VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 104049

Last change on this file since 104049 was 104049, checked in by vboxsync, 8 months ago

VMM/IEM: Mark the SIMD register as dirty based on the load size in iemNativeSimdRegAllocTmpForGuestSimdReg() so it doesn't need to be done in each native emitter, add some additional debug info about dirty SIMD registers, bugref:10614 [build fix]

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 460.0 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 104049 2024-03-25 21:08:40Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMN8veRecompilerTlbLookup.h"
103#include "IEMNativeFunctions.h"
104
105
106/*
107 * Narrow down configs here to avoid wasting time on unused configs here.
108 * Note! Same checks in IEMAllThrdRecompiler.cpp.
109 */
110
111#ifndef IEM_WITH_CODE_TLB
112# error The code TLB must be enabled for the recompiler.
113#endif
114
115#ifndef IEM_WITH_DATA_TLB
116# error The data TLB must be enabled for the recompiler.
117#endif
118
119#ifndef IEM_WITH_SETJMP
120# error The setjmp approach must be enabled for the recompiler.
121#endif
122
123/** @todo eliminate this clang build hack. */
124#if RT_CLANG_PREREQ(4, 0)
125# pragma GCC diagnostic ignored "-Wunused-function"
126#endif
127
128
129/*********************************************************************************************************************************
130* Internal Functions *
131*********************************************************************************************************************************/
132#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
133static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
134#endif
135DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
136DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
137 IEMNATIVEGSTREG enmGstReg, uint32_t off);
138DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
139
140
141/*********************************************************************************************************************************
142* Executable Memory Allocator *
143*********************************************************************************************************************************/
144/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
145 * Use an alternative chunk sub-allocator that does store internal data
146 * in the chunk.
147 *
148 * Using the RTHeapSimple is not practial on newer darwin systems where
149 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
150 * memory. We would have to change the protection of the whole chunk for
151 * every call to RTHeapSimple, which would be rather expensive.
152 *
153 * This alternative implemenation let restrict page protection modifications
154 * to the pages backing the executable memory we just allocated.
155 */
156#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
157/** The chunk sub-allocation unit size in bytes. */
158#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
159/** The chunk sub-allocation unit size as a shift factor. */
160#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
161
162#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
163# ifdef IEMNATIVE_USE_GDB_JIT
164# define IEMNATIVE_USE_GDB_JIT_ET_DYN
165
166/** GDB JIT: Code entry. */
167typedef struct GDBJITCODEENTRY
168{
169 struct GDBJITCODEENTRY *pNext;
170 struct GDBJITCODEENTRY *pPrev;
171 uint8_t *pbSymFile;
172 uint64_t cbSymFile;
173} GDBJITCODEENTRY;
174
175/** GDB JIT: Actions. */
176typedef enum GDBJITACTIONS : uint32_t
177{
178 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
179} GDBJITACTIONS;
180
181/** GDB JIT: Descriptor. */
182typedef struct GDBJITDESCRIPTOR
183{
184 uint32_t uVersion;
185 GDBJITACTIONS enmAction;
186 GDBJITCODEENTRY *pRelevant;
187 GDBJITCODEENTRY *pHead;
188 /** Our addition: */
189 GDBJITCODEENTRY *pTail;
190} GDBJITDESCRIPTOR;
191
192/** GDB JIT: Our simple symbol file data. */
193typedef struct GDBJITSYMFILE
194{
195 Elf64_Ehdr EHdr;
196# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
197 Elf64_Shdr aShdrs[5];
198# else
199 Elf64_Shdr aShdrs[7];
200 Elf64_Phdr aPhdrs[2];
201# endif
202 /** The dwarf ehframe data for the chunk. */
203 uint8_t abEhFrame[512];
204 char szzStrTab[128];
205 Elf64_Sym aSymbols[3];
206# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
207 Elf64_Sym aDynSyms[2];
208 Elf64_Dyn aDyn[6];
209# endif
210} GDBJITSYMFILE;
211
212extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
213extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
214
215/** Init once for g_IemNativeGdbJitLock. */
216static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
217/** Init once for the critical section. */
218static RTCRITSECT g_IemNativeGdbJitLock;
219
220/** GDB reads the info here. */
221GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
222
223/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
224DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
225{
226 ASMNopPause();
227}
228
229/** @callback_method_impl{FNRTONCE} */
230static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
231{
232 RT_NOREF(pvUser);
233 return RTCritSectInit(&g_IemNativeGdbJitLock);
234}
235
236
237# endif /* IEMNATIVE_USE_GDB_JIT */
238
239/**
240 * Per-chunk unwind info for non-windows hosts.
241 */
242typedef struct IEMEXECMEMCHUNKEHFRAME
243{
244# ifdef IEMNATIVE_USE_LIBUNWIND
245 /** The offset of the FDA into abEhFrame. */
246 uintptr_t offFda;
247# else
248 /** 'struct object' storage area. */
249 uint8_t abObject[1024];
250# endif
251# ifdef IEMNATIVE_USE_GDB_JIT
252# if 0
253 /** The GDB JIT 'symbol file' data. */
254 GDBJITSYMFILE GdbJitSymFile;
255# endif
256 /** The GDB JIT list entry. */
257 GDBJITCODEENTRY GdbJitEntry;
258# endif
259 /** The dwarf ehframe data for the chunk. */
260 uint8_t abEhFrame[512];
261} IEMEXECMEMCHUNKEHFRAME;
262/** Pointer to per-chunk info info for non-windows hosts. */
263typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
264#endif
265
266
267/**
268 * An chunk of executable memory.
269 */
270typedef struct IEMEXECMEMCHUNK
271{
272#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
273 /** Number of free items in this chunk. */
274 uint32_t cFreeUnits;
275 /** Hint were to start searching for free space in the allocation bitmap. */
276 uint32_t idxFreeHint;
277#else
278 /** The heap handle. */
279 RTHEAPSIMPLE hHeap;
280#endif
281 /** Pointer to the chunk. */
282 void *pvChunk;
283#ifdef IN_RING3
284 /**
285 * Pointer to the unwind information.
286 *
287 * This is used during C++ throw and longjmp (windows and probably most other
288 * platforms). Some debuggers (windbg) makes use of it as well.
289 *
290 * Windows: This is allocated from hHeap on windows because (at least for
291 * AMD64) the UNWIND_INFO structure address in the
292 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
293 *
294 * Others: Allocated from the regular heap to avoid unnecessary executable data
295 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
296 void *pvUnwindInfo;
297#elif defined(IN_RING0)
298 /** Allocation handle. */
299 RTR0MEMOBJ hMemObj;
300#endif
301} IEMEXECMEMCHUNK;
302/** Pointer to a memory chunk. */
303typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
304
305
306/**
307 * Executable memory allocator for the native recompiler.
308 */
309typedef struct IEMEXECMEMALLOCATOR
310{
311 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
312 uint32_t uMagic;
313
314 /** The chunk size. */
315 uint32_t cbChunk;
316 /** The maximum number of chunks. */
317 uint32_t cMaxChunks;
318 /** The current number of chunks. */
319 uint32_t cChunks;
320 /** Hint where to start looking for available memory. */
321 uint32_t idxChunkHint;
322 /** Statistics: Current number of allocations. */
323 uint32_t cAllocations;
324
325 /** The total amount of memory available. */
326 uint64_t cbTotal;
327 /** Total amount of free memory. */
328 uint64_t cbFree;
329 /** Total amount of memory allocated. */
330 uint64_t cbAllocated;
331
332#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
333 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
334 *
335 * Since the chunk size is a power of two and the minimum chunk size is a lot
336 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
337 * require a whole number of uint64_t elements in the allocation bitmap. So,
338 * for sake of simplicity, they are allocated as one continous chunk for
339 * simplicity/laziness. */
340 uint64_t *pbmAlloc;
341 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
342 uint32_t cUnitsPerChunk;
343 /** Number of bitmap elements per chunk (for quickly locating the bitmap
344 * portion corresponding to an chunk). */
345 uint32_t cBitmapElementsPerChunk;
346#else
347 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
348 * @{ */
349 /** The size of the heap internal block header. This is used to adjust the
350 * request memory size to make sure there is exacly enough room for a header at
351 * the end of the blocks we allocate before the next 64 byte alignment line. */
352 uint32_t cbHeapBlockHdr;
353 /** The size of initial heap allocation required make sure the first
354 * allocation is correctly aligned. */
355 uint32_t cbHeapAlignTweak;
356 /** The alignment tweak allocation address. */
357 void *pvAlignTweak;
358 /** @} */
359#endif
360
361#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
362 /** Pointer to the array of unwind info running parallel to aChunks (same
363 * allocation as this structure, located after the bitmaps).
364 * (For Windows, the structures must reside in 32-bit RVA distance to the
365 * actual chunk, so they are allocated off the chunk.) */
366 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
367#endif
368
369 /** The allocation chunks. */
370 RT_FLEXIBLE_ARRAY_EXTENSION
371 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
372} IEMEXECMEMALLOCATOR;
373/** Pointer to an executable memory allocator. */
374typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
375
376/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
377#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
378
379
380static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
381
382
383/**
384 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
385 * the heap statistics.
386 */
387static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
388 uint32_t cbReq, uint32_t idxChunk)
389{
390 pExecMemAllocator->cAllocations += 1;
391 pExecMemAllocator->cbAllocated += cbReq;
392#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
393 pExecMemAllocator->cbFree -= cbReq;
394#else
395 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
396#endif
397 pExecMemAllocator->idxChunkHint = idxChunk;
398
399#ifdef RT_OS_DARWIN
400 /*
401 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
402 * on darwin. So, we mark the pages returned as read+write after alloc and
403 * expect the caller to call iemExecMemAllocatorReadyForUse when done
404 * writing to the allocation.
405 *
406 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
407 * for details.
408 */
409 /** @todo detect if this is necessary... it wasn't required on 10.15 or
410 * whatever older version it was. */
411 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
412 AssertRC(rc);
413#endif
414
415 return pvRet;
416}
417
418
419#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
420static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
421 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
422{
423 /*
424 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
425 */
426 Assert(!(cToScan & 63));
427 Assert(!(idxFirst & 63));
428 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
429 pbmAlloc += idxFirst / 64;
430
431 /*
432 * Scan the bitmap for cReqUnits of consequtive clear bits
433 */
434 /** @todo This can probably be done more efficiently for non-x86 systems. */
435 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
436 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
437 {
438 uint32_t idxAddBit = 1;
439 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
440 idxAddBit++;
441 if (idxAddBit >= cReqUnits)
442 {
443 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
444
445 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
446 pChunk->cFreeUnits -= cReqUnits;
447 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
448
449 void * const pvRet = (uint8_t *)pChunk->pvChunk
450 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
451
452 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
453 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
454 }
455
456 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
457 }
458 return NULL;
459}
460#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
461
462
463static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
464{
465#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
466 /*
467 * Figure out how much to allocate.
468 */
469 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
470 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
471 {
472 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
473 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
474 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
475 {
476 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
477 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
478 if (pvRet)
479 return pvRet;
480 }
481 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
482 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
483 cReqUnits, idxChunk);
484 }
485#else
486 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
487 if (pvRet)
488 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
489#endif
490 return NULL;
491
492}
493
494
495/**
496 * Allocates @a cbReq bytes of executable memory.
497 *
498 * @returns Pointer to the memory, NULL if out of memory or other problem
499 * encountered.
500 * @param pVCpu The cross context virtual CPU structure of the calling
501 * thread.
502 * @param cbReq How many bytes are required.
503 */
504static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
505{
506 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
507 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
508 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
509
510
511 for (unsigned iIteration = 0;; iIteration++)
512 {
513 /*
514 * Adjust the request size so it'll fit the allocator alignment/whatnot.
515 *
516 * For the RTHeapSimple allocator this means to follow the logic described
517 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
518 * existing chunks if we think we've got sufficient free memory around.
519 *
520 * While for the alternative one we just align it up to a whole unit size.
521 */
522#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
523 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
524#else
525 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
526#endif
527 if (cbReq <= pExecMemAllocator->cbFree)
528 {
529 uint32_t const cChunks = pExecMemAllocator->cChunks;
530 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
531 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
532 {
533 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
534 if (pvRet)
535 return pvRet;
536 }
537 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
538 {
539 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
540 if (pvRet)
541 return pvRet;
542 }
543 }
544
545 /*
546 * Can we grow it with another chunk?
547 */
548 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
549 {
550 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
551 AssertLogRelRCReturn(rc, NULL);
552
553 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
554 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
555 if (pvRet)
556 return pvRet;
557 AssertFailed();
558 }
559
560 /*
561 * Try prune native TBs once.
562 */
563 if (iIteration == 0)
564 iemTbAllocatorFreeupNativeSpace(pVCpu, cbReq / sizeof(IEMNATIVEINSTR));
565 else
566 {
567 /** @todo stats... */
568 return NULL;
569 }
570 }
571
572}
573
574
575/** This is a hook that we may need later for changing memory protection back
576 * to readonly+exec */
577static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
578{
579#ifdef RT_OS_DARWIN
580 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
581 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
582 AssertRC(rc); RT_NOREF(pVCpu);
583
584 /*
585 * Flush the instruction cache:
586 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
587 */
588 /* sys_dcache_flush(pv, cb); - not necessary */
589 sys_icache_invalidate(pv, cb);
590#else
591 RT_NOREF(pVCpu, pv, cb);
592#endif
593}
594
595
596/**
597 * Frees executable memory.
598 */
599void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
600{
601 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
602 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
603 Assert(pv);
604#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
605 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
606#else
607 Assert(!((uintptr_t)pv & 63));
608#endif
609
610 /* Align the size as we did when allocating the block. */
611#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
612 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
613#else
614 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
615#endif
616
617 /* Free it / assert sanity. */
618#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
619 uint32_t const cChunks = pExecMemAllocator->cChunks;
620 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
621 bool fFound = false;
622 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
623 {
624 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
625 fFound = offChunk < cbChunk;
626 if (fFound)
627 {
628#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
629 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
630 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
631
632 /* Check that it's valid and free it. */
633 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
634 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
635 for (uint32_t i = 1; i < cReqUnits; i++)
636 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
637 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
638
639 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
640 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
641
642 /* Update the stats. */
643 pExecMemAllocator->cbAllocated -= cb;
644 pExecMemAllocator->cbFree += cb;
645 pExecMemAllocator->cAllocations -= 1;
646 return;
647#else
648 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
649 break;
650#endif
651 }
652 }
653# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
654 AssertFailed();
655# else
656 Assert(fFound);
657# endif
658#endif
659
660#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
661 /* Update stats while cb is freshly calculated.*/
662 pExecMemAllocator->cbAllocated -= cb;
663 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
664 pExecMemAllocator->cAllocations -= 1;
665
666 /* Free it. */
667 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
668#endif
669}
670
671
672
673#ifdef IN_RING3
674# ifdef RT_OS_WINDOWS
675
676/**
677 * Initializes the unwind info structures for windows hosts.
678 */
679static int
680iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
681 void *pvChunk, uint32_t idxChunk)
682{
683 RT_NOREF(pVCpu);
684
685 /*
686 * The AMD64 unwind opcodes.
687 *
688 * This is a program that starts with RSP after a RET instruction that
689 * ends up in recompiled code, and the operations we describe here will
690 * restore all non-volatile registers and bring RSP back to where our
691 * RET address is. This means it's reverse order from what happens in
692 * the prologue.
693 *
694 * Note! Using a frame register approach here both because we have one
695 * and but mainly because the UWOP_ALLOC_LARGE argument values
696 * would be a pain to write initializers for. On the positive
697 * side, we're impervious to changes in the the stack variable
698 * area can can deal with dynamic stack allocations if necessary.
699 */
700 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
701 {
702 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
703 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
704 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
705 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
706 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
707 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
708 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
709 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
710 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
711 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
712 };
713 union
714 {
715 IMAGE_UNWIND_INFO Info;
716 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
717 } s_UnwindInfo =
718 {
719 {
720 /* .Version = */ 1,
721 /* .Flags = */ 0,
722 /* .SizeOfProlog = */ 16, /* whatever */
723 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
724 /* .FrameRegister = */ X86_GREG_xBP,
725 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
726 }
727 };
728 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
729 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
730
731 /*
732 * Calc how much space we need and allocate it off the exec heap.
733 */
734 unsigned const cFunctionEntries = 1;
735 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
736 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
737# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
738 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
739 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
740 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
741# else
742 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
743 - pExecMemAllocator->cbHeapBlockHdr;
744 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
745 32 /*cbAlignment*/);
746# endif
747 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
748 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
749
750 /*
751 * Initialize the structures.
752 */
753 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
754
755 paFunctions[0].BeginAddress = 0;
756 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
757 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
758
759 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
760 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
761
762 /*
763 * Register it.
764 */
765 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
766 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
767
768 return VINF_SUCCESS;
769}
770
771
772# else /* !RT_OS_WINDOWS */
773
774/**
775 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
776 */
777DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
778{
779 if (iValue >= 64)
780 {
781 Assert(iValue < 0x2000);
782 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
783 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
784 }
785 else if (iValue >= 0)
786 *Ptr.pb++ = (uint8_t)iValue;
787 else if (iValue > -64)
788 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
789 else
790 {
791 Assert(iValue > -0x2000);
792 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
793 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
794 }
795 return Ptr;
796}
797
798
799/**
800 * Emits an ULEB128 encoded value (up to 64-bit wide).
801 */
802DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
803{
804 while (uValue >= 0x80)
805 {
806 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
807 uValue >>= 7;
808 }
809 *Ptr.pb++ = (uint8_t)uValue;
810 return Ptr;
811}
812
813
814/**
815 * Emits a CFA rule as register @a uReg + offset @a off.
816 */
817DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
818{
819 *Ptr.pb++ = DW_CFA_def_cfa;
820 Ptr = iemDwarfPutUleb128(Ptr, uReg);
821 Ptr = iemDwarfPutUleb128(Ptr, off);
822 return Ptr;
823}
824
825
826/**
827 * Emits a register (@a uReg) save location:
828 * CFA + @a off * data_alignment_factor
829 */
830DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
831{
832 if (uReg < 0x40)
833 *Ptr.pb++ = DW_CFA_offset | uReg;
834 else
835 {
836 *Ptr.pb++ = DW_CFA_offset_extended;
837 Ptr = iemDwarfPutUleb128(Ptr, uReg);
838 }
839 Ptr = iemDwarfPutUleb128(Ptr, off);
840 return Ptr;
841}
842
843
844# if 0 /* unused */
845/**
846 * Emits a register (@a uReg) save location, using signed offset:
847 * CFA + @a offSigned * data_alignment_factor
848 */
849DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
850{
851 *Ptr.pb++ = DW_CFA_offset_extended_sf;
852 Ptr = iemDwarfPutUleb128(Ptr, uReg);
853 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
854 return Ptr;
855}
856# endif
857
858
859/**
860 * Initializes the unwind info section for non-windows hosts.
861 */
862static int
863iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
864 void *pvChunk, uint32_t idxChunk)
865{
866 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
867 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
868
869 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
870
871 /*
872 * Generate the CIE first.
873 */
874# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
875 uint8_t const iDwarfVer = 3;
876# else
877 uint8_t const iDwarfVer = 4;
878# endif
879 RTPTRUNION const PtrCie = Ptr;
880 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
881 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
882 *Ptr.pb++ = iDwarfVer; /* DwARF version */
883 *Ptr.pb++ = 0; /* Augmentation. */
884 if (iDwarfVer >= 4)
885 {
886 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
887 *Ptr.pb++ = 0; /* Segment selector size. */
888 }
889# ifdef RT_ARCH_AMD64
890 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
891# else
892 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
893# endif
894 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
895# ifdef RT_ARCH_AMD64
896 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
897# elif defined(RT_ARCH_ARM64)
898 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
899# else
900# error "port me"
901# endif
902 /* Initial instructions: */
903# ifdef RT_ARCH_AMD64
904 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
905 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
906 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
907 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
908 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
909 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
910 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
911 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
912# elif defined(RT_ARCH_ARM64)
913# if 1
914 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
915# else
916 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
917# endif
918 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
919 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
920 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
921 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
922 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
923 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
924 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
925 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
926 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
927 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
928 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
929 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
930 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
931 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
932# else
933# error "port me"
934# endif
935 while ((Ptr.u - PtrCie.u) & 3)
936 *Ptr.pb++ = DW_CFA_nop;
937 /* Finalize the CIE size. */
938 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
939
940 /*
941 * Generate an FDE for the whole chunk area.
942 */
943# ifdef IEMNATIVE_USE_LIBUNWIND
944 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
945# endif
946 RTPTRUNION const PtrFde = Ptr;
947 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
948 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
949 Ptr.pu32++;
950 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
951 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
952# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
953 *Ptr.pb++ = DW_CFA_nop;
954# endif
955 while ((Ptr.u - PtrFde.u) & 3)
956 *Ptr.pb++ = DW_CFA_nop;
957 /* Finalize the FDE size. */
958 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
959
960 /* Terminator entry. */
961 *Ptr.pu32++ = 0;
962 *Ptr.pu32++ = 0; /* just to be sure... */
963 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
964
965 /*
966 * Register it.
967 */
968# ifdef IEMNATIVE_USE_LIBUNWIND
969 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
970# else
971 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
972 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
973# endif
974
975# ifdef IEMNATIVE_USE_GDB_JIT
976 /*
977 * Now for telling GDB about this (experimental).
978 *
979 * This seems to work best with ET_DYN.
980 */
981 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
982# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
983 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
984 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
985# else
986 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
987 - pExecMemAllocator->cbHeapBlockHdr;
988 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
989# endif
990 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
991 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
992
993 RT_ZERO(*pSymFile);
994
995 /*
996 * The ELF header:
997 */
998 pSymFile->EHdr.e_ident[0] = ELFMAG0;
999 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1000 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1001 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1002 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1003 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1004 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1005 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1006# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1007 pSymFile->EHdr.e_type = ET_DYN;
1008# else
1009 pSymFile->EHdr.e_type = ET_REL;
1010# endif
1011# ifdef RT_ARCH_AMD64
1012 pSymFile->EHdr.e_machine = EM_AMD64;
1013# elif defined(RT_ARCH_ARM64)
1014 pSymFile->EHdr.e_machine = EM_AARCH64;
1015# else
1016# error "port me"
1017# endif
1018 pSymFile->EHdr.e_version = 1; /*?*/
1019 pSymFile->EHdr.e_entry = 0;
1020# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1021 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1022# else
1023 pSymFile->EHdr.e_phoff = 0;
1024# endif
1025 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1026 pSymFile->EHdr.e_flags = 0;
1027 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1028# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1029 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1030 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1031# else
1032 pSymFile->EHdr.e_phentsize = 0;
1033 pSymFile->EHdr.e_phnum = 0;
1034# endif
1035 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1036 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1037 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1038
1039 uint32_t offStrTab = 0;
1040#define APPEND_STR(a_szStr) do { \
1041 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1042 offStrTab += sizeof(a_szStr); \
1043 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1044 } while (0)
1045#define APPEND_STR_FMT(a_szStr, ...) do { \
1046 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1047 offStrTab++; \
1048 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1049 } while (0)
1050
1051 /*
1052 * Section headers.
1053 */
1054 /* Section header #0: NULL */
1055 unsigned i = 0;
1056 APPEND_STR("");
1057 RT_ZERO(pSymFile->aShdrs[i]);
1058 i++;
1059
1060 /* Section header: .eh_frame */
1061 pSymFile->aShdrs[i].sh_name = offStrTab;
1062 APPEND_STR(".eh_frame");
1063 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1064 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1065# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1066 pSymFile->aShdrs[i].sh_offset
1067 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1068# else
1069 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1070 pSymFile->aShdrs[i].sh_offset = 0;
1071# endif
1072
1073 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1074 pSymFile->aShdrs[i].sh_link = 0;
1075 pSymFile->aShdrs[i].sh_info = 0;
1076 pSymFile->aShdrs[i].sh_addralign = 1;
1077 pSymFile->aShdrs[i].sh_entsize = 0;
1078 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1079 i++;
1080
1081 /* Section header: .shstrtab */
1082 unsigned const iShStrTab = i;
1083 pSymFile->EHdr.e_shstrndx = iShStrTab;
1084 pSymFile->aShdrs[i].sh_name = offStrTab;
1085 APPEND_STR(".shstrtab");
1086 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1087 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1088# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1089 pSymFile->aShdrs[i].sh_offset
1090 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1091# else
1092 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1093 pSymFile->aShdrs[i].sh_offset = 0;
1094# endif
1095 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1096 pSymFile->aShdrs[i].sh_link = 0;
1097 pSymFile->aShdrs[i].sh_info = 0;
1098 pSymFile->aShdrs[i].sh_addralign = 1;
1099 pSymFile->aShdrs[i].sh_entsize = 0;
1100 i++;
1101
1102 /* Section header: .symbols */
1103 pSymFile->aShdrs[i].sh_name = offStrTab;
1104 APPEND_STR(".symtab");
1105 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1106 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1107 pSymFile->aShdrs[i].sh_offset
1108 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1109 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1110 pSymFile->aShdrs[i].sh_link = iShStrTab;
1111 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1112 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1113 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1114 i++;
1115
1116# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1117 /* Section header: .symbols */
1118 pSymFile->aShdrs[i].sh_name = offStrTab;
1119 APPEND_STR(".dynsym");
1120 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1121 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1122 pSymFile->aShdrs[i].sh_offset
1123 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1124 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1125 pSymFile->aShdrs[i].sh_link = iShStrTab;
1126 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1127 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1128 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1129 i++;
1130# endif
1131
1132# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1133 /* Section header: .dynamic */
1134 pSymFile->aShdrs[i].sh_name = offStrTab;
1135 APPEND_STR(".dynamic");
1136 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1137 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1138 pSymFile->aShdrs[i].sh_offset
1139 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1140 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1141 pSymFile->aShdrs[i].sh_link = iShStrTab;
1142 pSymFile->aShdrs[i].sh_info = 0;
1143 pSymFile->aShdrs[i].sh_addralign = 1;
1144 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1145 i++;
1146# endif
1147
1148 /* Section header: .text */
1149 unsigned const iShText = i;
1150 pSymFile->aShdrs[i].sh_name = offStrTab;
1151 APPEND_STR(".text");
1152 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1153 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1154# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1155 pSymFile->aShdrs[i].sh_offset
1156 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1157# else
1158 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1159 pSymFile->aShdrs[i].sh_offset = 0;
1160# endif
1161 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1162 pSymFile->aShdrs[i].sh_link = 0;
1163 pSymFile->aShdrs[i].sh_info = 0;
1164 pSymFile->aShdrs[i].sh_addralign = 1;
1165 pSymFile->aShdrs[i].sh_entsize = 0;
1166 i++;
1167
1168 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1169
1170# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1171 /*
1172 * The program headers:
1173 */
1174 /* Everything in a single LOAD segment: */
1175 i = 0;
1176 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1177 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1178 pSymFile->aPhdrs[i].p_offset
1179 = pSymFile->aPhdrs[i].p_vaddr
1180 = pSymFile->aPhdrs[i].p_paddr = 0;
1181 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1182 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1183 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1184 i++;
1185 /* The .dynamic segment. */
1186 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1187 pSymFile->aPhdrs[i].p_flags = PF_R;
1188 pSymFile->aPhdrs[i].p_offset
1189 = pSymFile->aPhdrs[i].p_vaddr
1190 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1191 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1192 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1193 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1194 i++;
1195
1196 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1197
1198 /*
1199 * The dynamic section:
1200 */
1201 i = 0;
1202 pSymFile->aDyn[i].d_tag = DT_SONAME;
1203 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1204 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1205 i++;
1206 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1207 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1208 i++;
1209 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1210 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1211 i++;
1212 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1213 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1214 i++;
1215 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1216 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1217 i++;
1218 pSymFile->aDyn[i].d_tag = DT_NULL;
1219 i++;
1220 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1221# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1222
1223 /*
1224 * Symbol tables:
1225 */
1226 /** @todo gdb doesn't seem to really like this ... */
1227 i = 0;
1228 pSymFile->aSymbols[i].st_name = 0;
1229 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1230 pSymFile->aSymbols[i].st_value = 0;
1231 pSymFile->aSymbols[i].st_size = 0;
1232 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1233 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1234# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1235 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1236# endif
1237 i++;
1238
1239 pSymFile->aSymbols[i].st_name = 0;
1240 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1241 pSymFile->aSymbols[i].st_value = 0;
1242 pSymFile->aSymbols[i].st_size = 0;
1243 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1244 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1245 i++;
1246
1247 pSymFile->aSymbols[i].st_name = offStrTab;
1248 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1249# if 0
1250 pSymFile->aSymbols[i].st_shndx = iShText;
1251 pSymFile->aSymbols[i].st_value = 0;
1252# else
1253 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1254 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1255# endif
1256 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1257 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1258 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1259# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1260 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1261 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1262# endif
1263 i++;
1264
1265 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1266 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1267
1268 /*
1269 * The GDB JIT entry and informing GDB.
1270 */
1271 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1272# if 1
1273 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1274# else
1275 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1276# endif
1277
1278 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1279 RTCritSectEnter(&g_IemNativeGdbJitLock);
1280 pEhFrame->GdbJitEntry.pNext = NULL;
1281 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1282 if (__jit_debug_descriptor.pTail)
1283 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1284 else
1285 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1286 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1287 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1288
1289 /* Notify GDB: */
1290 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1291 __jit_debug_register_code();
1292 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1293 RTCritSectLeave(&g_IemNativeGdbJitLock);
1294
1295# else /* !IEMNATIVE_USE_GDB_JIT */
1296 RT_NOREF(pVCpu);
1297# endif /* !IEMNATIVE_USE_GDB_JIT */
1298
1299 return VINF_SUCCESS;
1300}
1301
1302# endif /* !RT_OS_WINDOWS */
1303#endif /* IN_RING3 */
1304
1305
1306/**
1307 * Adds another chunk to the executable memory allocator.
1308 *
1309 * This is used by the init code for the initial allocation and later by the
1310 * regular allocator function when it's out of memory.
1311 */
1312static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1313{
1314 /* Check that we've room for growth. */
1315 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1316 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1317
1318 /* Allocate a chunk. */
1319#ifdef RT_OS_DARWIN
1320 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1321#else
1322 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1323#endif
1324 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1325
1326#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1327 int rc = VINF_SUCCESS;
1328#else
1329 /* Initialize the heap for the chunk. */
1330 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1331 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1332 AssertRC(rc);
1333 if (RT_SUCCESS(rc))
1334 {
1335 /*
1336 * We want the memory to be aligned on 64 byte, so the first time thru
1337 * here we do some exploratory allocations to see how we can achieve this.
1338 * On subsequent runs we only make an initial adjustment allocation, if
1339 * necessary.
1340 *
1341 * Since we own the heap implementation, we know that the internal block
1342 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1343 * so all we need to wrt allocation size adjustments is to add 32 bytes
1344 * to the size, align up by 64 bytes, and subtract 32 bytes.
1345 *
1346 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1347 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1348 * allocation to force subsequent allocations to return 64 byte aligned
1349 * user areas.
1350 */
1351 if (!pExecMemAllocator->cbHeapBlockHdr)
1352 {
1353 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1354 pExecMemAllocator->cbHeapAlignTweak = 64;
1355 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1356 32 /*cbAlignment*/);
1357 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1358
1359 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1360 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1361 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1362 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1363 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1364
1365 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1366 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1367 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1368 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1369 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1370
1371 RTHeapSimpleFree(hHeap, pvTest2);
1372 RTHeapSimpleFree(hHeap, pvTest1);
1373 }
1374 else
1375 {
1376 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1377 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1378 }
1379 if (RT_SUCCESS(rc))
1380#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1381 {
1382 /*
1383 * Add the chunk.
1384 *
1385 * This must be done before the unwind init so windows can allocate
1386 * memory from the chunk when using the alternative sub-allocator.
1387 */
1388 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1389#ifdef IN_RING3
1390 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1391#endif
1392#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1393 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1394#else
1395 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1396 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1397 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1398 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1399#endif
1400
1401 pExecMemAllocator->cChunks = idxChunk + 1;
1402 pExecMemAllocator->idxChunkHint = idxChunk;
1403
1404#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1405 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1406 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1407#else
1408 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1409 pExecMemAllocator->cbTotal += cbFree;
1410 pExecMemAllocator->cbFree += cbFree;
1411#endif
1412
1413#ifdef IN_RING3
1414 /*
1415 * Initialize the unwind information (this cannot really fail atm).
1416 * (This sets pvUnwindInfo.)
1417 */
1418 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1419 if (RT_SUCCESS(rc))
1420#endif
1421 {
1422 return VINF_SUCCESS;
1423 }
1424
1425#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1426 /* Just in case the impossible happens, undo the above up: */
1427 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1428 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1429 pExecMemAllocator->cChunks = idxChunk;
1430 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1431 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1432 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1433 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1434#endif
1435 }
1436#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1437 }
1438#endif
1439 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1440 RT_NOREF(pVCpu);
1441 return rc;
1442}
1443
1444
1445/**
1446 * Initializes the executable memory allocator for native recompilation on the
1447 * calling EMT.
1448 *
1449 * @returns VBox status code.
1450 * @param pVCpu The cross context virtual CPU structure of the calling
1451 * thread.
1452 * @param cbMax The max size of the allocator.
1453 * @param cbInitial The initial allocator size.
1454 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1455 * dependent).
1456 */
1457int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1458{
1459 /*
1460 * Validate input.
1461 */
1462 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1463 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1464 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1465 || cbChunk == 0
1466 || ( RT_IS_POWER_OF_TWO(cbChunk)
1467 && cbChunk >= _1M
1468 && cbChunk <= _256M
1469 && cbChunk <= cbMax),
1470 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1471 VERR_OUT_OF_RANGE);
1472
1473 /*
1474 * Adjust/figure out the chunk size.
1475 */
1476 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1477 {
1478 if (cbMax >= _256M)
1479 cbChunk = _64M;
1480 else
1481 {
1482 if (cbMax < _16M)
1483 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1484 else
1485 cbChunk = (uint32_t)cbMax / 4;
1486 if (!RT_IS_POWER_OF_TWO(cbChunk))
1487 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1488 }
1489 }
1490
1491 if (cbChunk > cbMax)
1492 cbMax = cbChunk;
1493 else
1494 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1495 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1496 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1497
1498 /*
1499 * Allocate and initialize the allocatore instance.
1500 */
1501 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1502#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1503 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1504 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1505 cbNeeded += cbBitmap * cMaxChunks;
1506 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1507 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1508#endif
1509#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1510 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1511 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1512#endif
1513 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1514 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1515 VERR_NO_MEMORY);
1516 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1517 pExecMemAllocator->cbChunk = cbChunk;
1518 pExecMemAllocator->cMaxChunks = cMaxChunks;
1519 pExecMemAllocator->cChunks = 0;
1520 pExecMemAllocator->idxChunkHint = 0;
1521 pExecMemAllocator->cAllocations = 0;
1522 pExecMemAllocator->cbTotal = 0;
1523 pExecMemAllocator->cbFree = 0;
1524 pExecMemAllocator->cbAllocated = 0;
1525#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1526 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1527 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1528 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1529 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1530#endif
1531#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1532 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1533#endif
1534 for (uint32_t i = 0; i < cMaxChunks; i++)
1535 {
1536#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1537 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1538 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1539#else
1540 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1541#endif
1542 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1543#ifdef IN_RING0
1544 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1545#else
1546 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1547#endif
1548 }
1549 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1550
1551 /*
1552 * Do the initial allocations.
1553 */
1554 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1555 {
1556 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1557 AssertLogRelRCReturn(rc, rc);
1558 }
1559
1560 pExecMemAllocator->idxChunkHint = 0;
1561
1562 return VINF_SUCCESS;
1563}
1564
1565
1566/*********************************************************************************************************************************
1567* Native Recompilation *
1568*********************************************************************************************************************************/
1569
1570
1571/**
1572 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1573 */
1574IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1575{
1576 pVCpu->iem.s.cInstructions += idxInstr;
1577 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1578}
1579
1580
1581/**
1582 * Used by TB code when it wants to raise a \#DE.
1583 */
1584IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseDe,(PVMCPUCC pVCpu))
1585{
1586 iemRaiseDivideErrorJmp(pVCpu);
1587#ifndef _MSC_VER
1588 return VINF_IEM_RAISED_XCPT; /* not reached */
1589#endif
1590}
1591
1592
1593/**
1594 * Used by TB code when it wants to raise a \#UD.
1595 */
1596IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
1597{
1598 iemRaiseUndefinedOpcodeJmp(pVCpu);
1599#ifndef _MSC_VER
1600 return VINF_IEM_RAISED_XCPT; /* not reached */
1601#endif
1602}
1603
1604
1605/**
1606 * Used by TB code when it wants to raise an SSE related \#UD or \#NM.
1607 *
1608 * See IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT.
1609 */
1610IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseRelated,(PVMCPUCC pVCpu))
1611{
1612 if ( (pVCpu->cpum.GstCtx.cr0 & X86_CR0_EM)
1613 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSFXSR))
1614 iemRaiseUndefinedOpcodeJmp(pVCpu);
1615 else
1616 iemRaiseDeviceNotAvailableJmp(pVCpu);
1617#ifndef _MSC_VER
1618 return VINF_IEM_RAISED_XCPT; /* not reached */
1619#endif
1620}
1621
1622
1623/**
1624 * Used by TB code when it wants to raise an AVX related \#UD or \#NM.
1625 *
1626 * See IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT.
1627 */
1628IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseAvxRelated,(PVMCPUCC pVCpu))
1629{
1630 if ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE)
1631 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE))
1632 iemRaiseUndefinedOpcodeJmp(pVCpu);
1633 else
1634 iemRaiseDeviceNotAvailableJmp(pVCpu);
1635#ifndef _MSC_VER
1636 return VINF_IEM_RAISED_XCPT; /* not reached */
1637#endif
1638}
1639
1640
1641/**
1642 * Used by TB code when it wants to raise an SSE/AVX floating point exception related \#UD or \#XF.
1643 *
1644 * See IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT.
1645 */
1646IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseAvxFpRelated,(PVMCPUCC pVCpu))
1647{
1648 if (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXMMEEXCPT)
1649 iemRaiseSimdFpExceptionJmp(pVCpu);
1650 else
1651 iemRaiseUndefinedOpcodeJmp(pVCpu);
1652#ifndef _MSC_VER
1653 return VINF_IEM_RAISED_XCPT; /* not reached */
1654#endif
1655}
1656
1657
1658/**
1659 * Used by TB code when it wants to raise a \#NM.
1660 */
1661IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
1662{
1663 iemRaiseDeviceNotAvailableJmp(pVCpu);
1664#ifndef _MSC_VER
1665 return VINF_IEM_RAISED_XCPT; /* not reached */
1666#endif
1667}
1668
1669
1670/**
1671 * Used by TB code when it wants to raise a \#GP(0).
1672 */
1673IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
1674{
1675 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1676#ifndef _MSC_VER
1677 return VINF_IEM_RAISED_XCPT; /* not reached */
1678#endif
1679}
1680
1681
1682/**
1683 * Used by TB code when it wants to raise a \#MF.
1684 */
1685IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
1686{
1687 iemRaiseMathFaultJmp(pVCpu);
1688#ifndef _MSC_VER
1689 return VINF_IEM_RAISED_XCPT; /* not reached */
1690#endif
1691}
1692
1693
1694/**
1695 * Used by TB code when it wants to raise a \#XF.
1696 */
1697IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
1698{
1699 iemRaiseSimdFpExceptionJmp(pVCpu);
1700#ifndef _MSC_VER
1701 return VINF_IEM_RAISED_XCPT; /* not reached */
1702#endif
1703}
1704
1705
1706/**
1707 * Used by TB code when detecting opcode changes.
1708 * @see iemThreadeFuncWorkerObsoleteTb
1709 */
1710IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
1711{
1712 /* We set fSafeToFree to false where as we're being called in the context
1713 of a TB callback function, which for native TBs means we cannot release
1714 the executable memory till we've returned our way back to iemTbExec as
1715 that return path codes via the native code generated for the TB. */
1716 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1717 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
1718 return VINF_IEM_REEXEC_BREAK;
1719}
1720
1721
1722/**
1723 * Used by TB code when we need to switch to a TB with CS.LIM checking.
1724 */
1725IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
1726{
1727 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
1728 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1729 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
1730 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
1731 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
1732 return VINF_IEM_REEXEC_BREAK;
1733}
1734
1735
1736/**
1737 * Used by TB code when we missed a PC check after a branch.
1738 */
1739IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
1740{
1741 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
1742 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1743 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
1744 pVCpu->iem.s.pbInstrBuf));
1745 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
1746 return VINF_IEM_REEXEC_BREAK;
1747}
1748
1749
1750
1751/*********************************************************************************************************************************
1752* Helpers: Segmented memory fetches and stores. *
1753*********************************************************************************************************************************/
1754
1755/**
1756 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1757 */
1758IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1759{
1760#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1761 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1762#else
1763 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1764#endif
1765}
1766
1767
1768/**
1769 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1770 * to 16 bits.
1771 */
1772IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1773{
1774#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1775 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1776#else
1777 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1778#endif
1779}
1780
1781
1782/**
1783 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1784 * to 32 bits.
1785 */
1786IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1787{
1788#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1789 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1790#else
1791 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1792#endif
1793}
1794
1795/**
1796 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1797 * to 64 bits.
1798 */
1799IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1800{
1801#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1802 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1803#else
1804 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1805#endif
1806}
1807
1808
1809/**
1810 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1811 */
1812IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1813{
1814#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1815 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1816#else
1817 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1818#endif
1819}
1820
1821
1822/**
1823 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1824 * to 32 bits.
1825 */
1826IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1827{
1828#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1829 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1830#else
1831 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1832#endif
1833}
1834
1835
1836/**
1837 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1838 * to 64 bits.
1839 */
1840IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1841{
1842#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1843 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1844#else
1845 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1846#endif
1847}
1848
1849
1850/**
1851 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1852 */
1853IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1854{
1855#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1856 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1857#else
1858 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1859#endif
1860}
1861
1862
1863/**
1864 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
1865 * to 64 bits.
1866 */
1867IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1868{
1869#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1870 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1871#else
1872 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1873#endif
1874}
1875
1876
1877/**
1878 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1879 */
1880IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1881{
1882#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1883 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
1884#else
1885 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
1886#endif
1887}
1888
1889
1890#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1891/**
1892 * Used by TB code to load 128-bit data w/ segmentation.
1893 */
1894IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
1895{
1896#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1897 iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1898#else
1899 iemMemFetchDataU128Jmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1900#endif
1901}
1902
1903
1904/**
1905 * Used by TB code to load 128-bit data w/ segmentation.
1906 */
1907IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
1908{
1909#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1910 iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1911#else
1912 iemMemFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1913#endif
1914}
1915
1916
1917/**
1918 * Used by TB code to load 128-bit data w/ segmentation.
1919 */
1920IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
1921{
1922#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1923 iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1924#else
1925 iemMemFetchDataU128NoAcJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1926#endif
1927}
1928
1929
1930/**
1931 * Used by TB code to load 256-bit data w/ segmentation.
1932 */
1933IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
1934{
1935#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1936 iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
1937#else
1938 iemMemFetchDataU256NoAcJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
1939#endif
1940}
1941
1942
1943/**
1944 * Used by TB code to load 256-bit data w/ segmentation.
1945 */
1946IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
1947{
1948#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1949 iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
1950#else
1951 iemMemFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
1952#endif
1953}
1954#endif
1955
1956
1957/**
1958 * Used by TB code to store unsigned 8-bit data w/ segmentation.
1959 */
1960IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
1961{
1962#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1963 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1964#else
1965 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1966#endif
1967}
1968
1969
1970/**
1971 * Used by TB code to store unsigned 16-bit data w/ segmentation.
1972 */
1973IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
1974{
1975#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1976 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1977#else
1978 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1979#endif
1980}
1981
1982
1983/**
1984 * Used by TB code to store unsigned 32-bit data w/ segmentation.
1985 */
1986IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
1987{
1988#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1989 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1990#else
1991 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1992#endif
1993}
1994
1995
1996/**
1997 * Used by TB code to store unsigned 64-bit data w/ segmentation.
1998 */
1999IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
2000{
2001#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2002 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
2003#else
2004 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
2005#endif
2006}
2007
2008
2009#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2010/**
2011 * Used by TB code to store unsigned 128-bit data w/ segmentation.
2012 */
2013IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
2014{
2015#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2016 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
2017#else
2018 iemMemStoreDataU128AlignedSseJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
2019#endif
2020}
2021
2022
2023/**
2024 * Used by TB code to store unsigned 128-bit data w/ segmentation.
2025 */
2026IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
2027{
2028#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2029 iemMemStoreDataU128NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
2030#else
2031 iemMemStoreDataU128NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
2032#endif
2033}
2034
2035
2036/**
2037 * Used by TB code to store unsigned 256-bit data w/ segmentation.
2038 */
2039IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
2040{
2041#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2042 iemMemStoreDataU256NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
2043#else
2044 iemMemStoreDataU256NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
2045#endif
2046}
2047
2048
2049/**
2050 * Used by TB code to store unsigned 256-bit data w/ segmentation.
2051 */
2052IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
2053{
2054#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2055 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
2056#else
2057 iemMemStoreDataU256AlignedAvxcJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
2058#endif
2059}
2060#endif
2061
2062
2063
2064/**
2065 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
2066 */
2067IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2068{
2069#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2070 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2071#else
2072 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2073#endif
2074}
2075
2076
2077/**
2078 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
2079 */
2080IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2081{
2082#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2083 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2084#else
2085 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2086#endif
2087}
2088
2089
2090/**
2091 * Used by TB code to store an 32-bit selector value onto a generic stack.
2092 *
2093 * Intel CPUs doesn't do write a whole dword, thus the special function.
2094 */
2095IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2096{
2097#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2098 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2099#else
2100 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2101#endif
2102}
2103
2104
2105/**
2106 * Used by TB code to push unsigned 64-bit value onto a generic stack.
2107 */
2108IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2109{
2110#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2111 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2112#else
2113 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2114#endif
2115}
2116
2117
2118/**
2119 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2120 */
2121IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2122{
2123#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2124 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2125#else
2126 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
2127#endif
2128}
2129
2130
2131/**
2132 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2133 */
2134IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2135{
2136#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2137 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2138#else
2139 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
2140#endif
2141}
2142
2143
2144/**
2145 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2146 */
2147IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2148{
2149#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2150 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2151#else
2152 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
2153#endif
2154}
2155
2156
2157
2158/*********************************************************************************************************************************
2159* Helpers: Flat memory fetches and stores. *
2160*********************************************************************************************************************************/
2161
2162/**
2163 * Used by TB code to load unsigned 8-bit data w/ flat address.
2164 * @note Zero extending the value to 64-bit to simplify assembly.
2165 */
2166IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2167{
2168#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2169 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2170#else
2171 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2172#endif
2173}
2174
2175
2176/**
2177 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2178 * to 16 bits.
2179 * @note Zero extending the value to 64-bit to simplify assembly.
2180 */
2181IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2182{
2183#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2184 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2185#else
2186 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2187#endif
2188}
2189
2190
2191/**
2192 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2193 * to 32 bits.
2194 * @note Zero extending the value to 64-bit to simplify assembly.
2195 */
2196IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2197{
2198#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2199 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2200#else
2201 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2202#endif
2203}
2204
2205
2206/**
2207 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2208 * to 64 bits.
2209 */
2210IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2211{
2212#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2213 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2214#else
2215 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2216#endif
2217}
2218
2219
2220/**
2221 * Used by TB code to load unsigned 16-bit data w/ flat address.
2222 * @note Zero extending the value to 64-bit to simplify assembly.
2223 */
2224IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2225{
2226#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2227 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2228#else
2229 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2230#endif
2231}
2232
2233
2234/**
2235 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2236 * to 32 bits.
2237 * @note Zero extending the value to 64-bit to simplify assembly.
2238 */
2239IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2240{
2241#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2242 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2243#else
2244 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2245#endif
2246}
2247
2248
2249/**
2250 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2251 * to 64 bits.
2252 * @note Zero extending the value to 64-bit to simplify assembly.
2253 */
2254IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2255{
2256#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2257 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2258#else
2259 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2260#endif
2261}
2262
2263
2264/**
2265 * Used by TB code to load unsigned 32-bit data w/ flat address.
2266 * @note Zero extending the value to 64-bit to simplify assembly.
2267 */
2268IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2269{
2270#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2271 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2272#else
2273 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2274#endif
2275}
2276
2277
2278/**
2279 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
2280 * to 64 bits.
2281 * @note Zero extending the value to 64-bit to simplify assembly.
2282 */
2283IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2284{
2285#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2286 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2287#else
2288 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2289#endif
2290}
2291
2292
2293/**
2294 * Used by TB code to load unsigned 64-bit data w/ flat address.
2295 */
2296IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2297{
2298#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2299 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2300#else
2301 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
2302#endif
2303}
2304
2305
2306#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2307/**
2308 * Used by TB code to load unsigned 128-bit data w/ flat address.
2309 */
2310IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
2311{
2312#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2313 return iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2314#else
2315 return iemMemFlatFetchDataU128Jmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2316#endif
2317}
2318
2319
2320/**
2321 * Used by TB code to load unsigned 128-bit data w/ flat address.
2322 */
2323IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
2324{
2325#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2326 return iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2327#else
2328 return iemMemFlatFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2329#endif
2330}
2331
2332
2333/**
2334 * Used by TB code to load unsigned 128-bit data w/ flat address.
2335 */
2336IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
2337{
2338#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2339 return iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2340#else
2341 return iemMemFlatFetchDataU128NoAcJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2342#endif
2343}
2344
2345
2346/**
2347 * Used by TB code to load unsigned 256-bit data w/ flat address.
2348 */
2349IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
2350{
2351#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2352 return iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
2353#else
2354 return iemMemFlatFetchDataU256NoAcJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
2355#endif
2356}
2357
2358
2359/**
2360 * Used by TB code to load unsigned 256-bit data w/ flat address.
2361 */
2362IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
2363{
2364#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2365 return iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
2366#else
2367 return iemMemFlatFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
2368#endif
2369}
2370#endif
2371
2372
2373/**
2374 * Used by TB code to store unsigned 8-bit data w/ flat address.
2375 */
2376IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
2377{
2378#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2379 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
2380#else
2381 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
2382#endif
2383}
2384
2385
2386/**
2387 * Used by TB code to store unsigned 16-bit data w/ flat address.
2388 */
2389IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2390{
2391#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2392 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
2393#else
2394 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
2395#endif
2396}
2397
2398
2399/**
2400 * Used by TB code to store unsigned 32-bit data w/ flat address.
2401 */
2402IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2403{
2404#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2405 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
2406#else
2407 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
2408#endif
2409}
2410
2411
2412/**
2413 * Used by TB code to store unsigned 64-bit data w/ flat address.
2414 */
2415IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2416{
2417#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2418 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
2419#else
2420 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
2421#endif
2422}
2423
2424
2425#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2426/**
2427 * Used by TB code to store unsigned 128-bit data w/ flat address.
2428 */
2429IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
2430{
2431#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2432 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
2433#else
2434 iemMemFlatStoreDataU128AlignedSseJmp(pVCpu, GCPtrMem, pu128Src);
2435#endif
2436}
2437
2438
2439/**
2440 * Used by TB code to store unsigned 128-bit data w/ flat address.
2441 */
2442IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
2443{
2444#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2445 iemMemStoreDataU128NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
2446#else
2447 iemMemFlatStoreDataU128NoAcJmp(pVCpu, GCPtrMem, pu128Src);
2448#endif
2449}
2450
2451
2452/**
2453 * Used by TB code to store unsigned 256-bit data w/ flat address.
2454 */
2455IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
2456{
2457#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2458 iemMemStoreDataU256NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
2459#else
2460 iemMemFlatStoreDataU256NoAcJmp(pVCpu, GCPtrMem, pu256Src);
2461#endif
2462}
2463
2464
2465/**
2466 * Used by TB code to store unsigned 256-bit data w/ flat address.
2467 */
2468IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
2469{
2470#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2471 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
2472#else
2473 iemMemFlatStoreDataU256AlignedAvxJmp(pVCpu, GCPtrMem, pu256Src);
2474#endif
2475}
2476#endif
2477
2478
2479
2480/**
2481 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
2482 */
2483IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2484{
2485#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2486 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2487#else
2488 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2489#endif
2490}
2491
2492
2493/**
2494 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
2495 */
2496IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2497{
2498#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2499 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2500#else
2501 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2502#endif
2503}
2504
2505
2506/**
2507 * Used by TB code to store a segment selector value onto a flat stack.
2508 *
2509 * Intel CPUs doesn't do write a whole dword, thus the special function.
2510 */
2511IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2512{
2513#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2514 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2515#else
2516 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2517#endif
2518}
2519
2520
2521/**
2522 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
2523 */
2524IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2525{
2526#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2527 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2528#else
2529 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2530#endif
2531}
2532
2533
2534/**
2535 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2536 */
2537IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2538{
2539#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2540 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2541#else
2542 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
2543#endif
2544}
2545
2546
2547/**
2548 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2549 */
2550IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2551{
2552#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2553 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2554#else
2555 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
2556#endif
2557}
2558
2559
2560/**
2561 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2562 */
2563IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2564{
2565#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2566 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2567#else
2568 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
2569#endif
2570}
2571
2572
2573
2574/*********************************************************************************************************************************
2575* Helpers: Segmented memory mapping. *
2576*********************************************************************************************************************************/
2577
2578/**
2579 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
2580 * segmentation.
2581 */
2582IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2583 RTGCPTR GCPtrMem, uint8_t iSegReg))
2584{
2585#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2586 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2587#else
2588 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2589#endif
2590}
2591
2592
2593/**
2594 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
2595 */
2596IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2597 RTGCPTR GCPtrMem, uint8_t iSegReg))
2598{
2599#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2600 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2601#else
2602 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2603#endif
2604}
2605
2606
2607/**
2608 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
2609 */
2610IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2611 RTGCPTR GCPtrMem, uint8_t iSegReg))
2612{
2613#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2614 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2615#else
2616 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2617#endif
2618}
2619
2620
2621/**
2622 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
2623 */
2624IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2625 RTGCPTR GCPtrMem, uint8_t iSegReg))
2626{
2627#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2628 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2629#else
2630 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2631#endif
2632}
2633
2634
2635/**
2636 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
2637 * segmentation.
2638 */
2639IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2640 RTGCPTR GCPtrMem, uint8_t iSegReg))
2641{
2642#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2643 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2644#else
2645 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2646#endif
2647}
2648
2649
2650/**
2651 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
2652 */
2653IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2654 RTGCPTR GCPtrMem, uint8_t iSegReg))
2655{
2656#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2657 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2658#else
2659 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2660#endif
2661}
2662
2663
2664/**
2665 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
2666 */
2667IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2668 RTGCPTR GCPtrMem, uint8_t iSegReg))
2669{
2670#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2671 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2672#else
2673 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2674#endif
2675}
2676
2677
2678/**
2679 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
2680 */
2681IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2682 RTGCPTR GCPtrMem, uint8_t iSegReg))
2683{
2684#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2685 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2686#else
2687 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2688#endif
2689}
2690
2691
2692/**
2693 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
2694 * segmentation.
2695 */
2696IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2697 RTGCPTR GCPtrMem, uint8_t iSegReg))
2698{
2699#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2700 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2701#else
2702 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2703#endif
2704}
2705
2706
2707/**
2708 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
2709 */
2710IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2711 RTGCPTR GCPtrMem, uint8_t iSegReg))
2712{
2713#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2714 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2715#else
2716 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2717#endif
2718}
2719
2720
2721/**
2722 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
2723 */
2724IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2725 RTGCPTR GCPtrMem, uint8_t iSegReg))
2726{
2727#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2728 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2729#else
2730 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2731#endif
2732}
2733
2734
2735/**
2736 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
2737 */
2738IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2739 RTGCPTR GCPtrMem, uint8_t iSegReg))
2740{
2741#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2742 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2743#else
2744 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2745#endif
2746}
2747
2748
2749/**
2750 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
2751 * segmentation.
2752 */
2753IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2754 RTGCPTR GCPtrMem, uint8_t iSegReg))
2755{
2756#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2757 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2758#else
2759 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2760#endif
2761}
2762
2763
2764/**
2765 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
2766 */
2767IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2768 RTGCPTR GCPtrMem, uint8_t iSegReg))
2769{
2770#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2771 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2772#else
2773 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2774#endif
2775}
2776
2777
2778/**
2779 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
2780 */
2781IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2782 RTGCPTR GCPtrMem, uint8_t iSegReg))
2783{
2784#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2785 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2786#else
2787 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2788#endif
2789}
2790
2791
2792/**
2793 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
2794 */
2795IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2796 RTGCPTR GCPtrMem, uint8_t iSegReg))
2797{
2798#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2799 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2800#else
2801 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2802#endif
2803}
2804
2805
2806/**
2807 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
2808 */
2809IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2810 RTGCPTR GCPtrMem, uint8_t iSegReg))
2811{
2812#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2813 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2814#else
2815 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2816#endif
2817}
2818
2819
2820/**
2821 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
2822 */
2823IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2824 RTGCPTR GCPtrMem, uint8_t iSegReg))
2825{
2826#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2827 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2828#else
2829 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2830#endif
2831}
2832
2833
2834/**
2835 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
2836 * segmentation.
2837 */
2838IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2839 RTGCPTR GCPtrMem, uint8_t iSegReg))
2840{
2841#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2842 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2843#else
2844 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2845#endif
2846}
2847
2848
2849/**
2850 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
2851 */
2852IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2853 RTGCPTR GCPtrMem, uint8_t iSegReg))
2854{
2855#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2856 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2857#else
2858 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2859#endif
2860}
2861
2862
2863/**
2864 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
2865 */
2866IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2867 RTGCPTR GCPtrMem, uint8_t iSegReg))
2868{
2869#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2870 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2871#else
2872 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2873#endif
2874}
2875
2876
2877/**
2878 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
2879 */
2880IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2881 RTGCPTR GCPtrMem, uint8_t iSegReg))
2882{
2883#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2884 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2885#else
2886 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2887#endif
2888}
2889
2890
2891/*********************************************************************************************************************************
2892* Helpers: Flat memory mapping. *
2893*********************************************************************************************************************************/
2894
2895/**
2896 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
2897 * address.
2898 */
2899IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2900{
2901#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2902 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2903#else
2904 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2905#endif
2906}
2907
2908
2909/**
2910 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
2911 */
2912IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2913{
2914#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2915 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2916#else
2917 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2918#endif
2919}
2920
2921
2922/**
2923 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
2924 */
2925IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2926{
2927#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2928 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2929#else
2930 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2931#endif
2932}
2933
2934
2935/**
2936 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
2937 */
2938IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2939{
2940#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2941 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2942#else
2943 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2944#endif
2945}
2946
2947
2948/**
2949 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
2950 * address.
2951 */
2952IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2953{
2954#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2955 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2956#else
2957 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2958#endif
2959}
2960
2961
2962/**
2963 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
2964 */
2965IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2966{
2967#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2968 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2969#else
2970 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2971#endif
2972}
2973
2974
2975/**
2976 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
2977 */
2978IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2979{
2980#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2981 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2982#else
2983 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2984#endif
2985}
2986
2987
2988/**
2989 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
2990 */
2991IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2992{
2993#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2994 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2995#else
2996 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2997#endif
2998}
2999
3000
3001/**
3002 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
3003 * address.
3004 */
3005IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3006{
3007#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3008 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3009#else
3010 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3011#endif
3012}
3013
3014
3015/**
3016 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
3017 */
3018IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3019{
3020#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3021 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3022#else
3023 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3024#endif
3025}
3026
3027
3028/**
3029 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
3030 */
3031IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3032{
3033#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3034 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3035#else
3036 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3037#endif
3038}
3039
3040
3041/**
3042 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
3043 */
3044IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3045{
3046#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3047 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3048#else
3049 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3050#endif
3051}
3052
3053
3054/**
3055 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
3056 * address.
3057 */
3058IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3059{
3060#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3061 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3062#else
3063 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3064#endif
3065}
3066
3067
3068/**
3069 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
3070 */
3071IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3072{
3073#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3074 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3075#else
3076 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3077#endif
3078}
3079
3080
3081/**
3082 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
3083 */
3084IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3085{
3086#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3087 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3088#else
3089 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3090#endif
3091}
3092
3093
3094/**
3095 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
3096 */
3097IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3098{
3099#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3100 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3101#else
3102 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3103#endif
3104}
3105
3106
3107/**
3108 * Used by TB code to map 80-bit float data writeonly w/ flat address.
3109 */
3110IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3111{
3112#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3113 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3114#else
3115 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3116#endif
3117}
3118
3119
3120/**
3121 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
3122 */
3123IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3124{
3125#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3126 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3127#else
3128 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3129#endif
3130}
3131
3132
3133/**
3134 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
3135 * address.
3136 */
3137IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3138{
3139#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3140 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3141#else
3142 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3143#endif
3144}
3145
3146
3147/**
3148 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
3149 */
3150IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3151{
3152#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3153 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3154#else
3155 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3156#endif
3157}
3158
3159
3160/**
3161 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
3162 */
3163IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3164{
3165#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3166 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3167#else
3168 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3169#endif
3170}
3171
3172
3173/**
3174 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
3175 */
3176IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3177{
3178#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3179 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3180#else
3181 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3182#endif
3183}
3184
3185
3186/*********************************************************************************************************************************
3187* Helpers: Commit, rollback & unmap *
3188*********************************************************************************************************************************/
3189
3190/**
3191 * Used by TB code to commit and unmap a read-write memory mapping.
3192 */
3193IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3194{
3195 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
3196}
3197
3198
3199/**
3200 * Used by TB code to commit and unmap a read-write memory mapping.
3201 */
3202IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3203{
3204 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
3205}
3206
3207
3208/**
3209 * Used by TB code to commit and unmap a write-only memory mapping.
3210 */
3211IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3212{
3213 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
3214}
3215
3216
3217/**
3218 * Used by TB code to commit and unmap a read-only memory mapping.
3219 */
3220IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3221{
3222 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
3223}
3224
3225
3226/**
3227 * Reinitializes the native recompiler state.
3228 *
3229 * Called before starting a new recompile job.
3230 */
3231static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
3232{
3233 pReNative->cLabels = 0;
3234 pReNative->bmLabelTypes = 0;
3235 pReNative->cFixups = 0;
3236#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3237 pReNative->pDbgInfo->cEntries = 0;
3238#endif
3239 pReNative->pTbOrg = pTb;
3240 pReNative->cCondDepth = 0;
3241 pReNative->uCondSeqNo = 0;
3242 pReNative->uCheckIrqSeqNo = 0;
3243 pReNative->uTlbSeqNo = 0;
3244
3245#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3246 pReNative->Core.offPc = 0;
3247 pReNative->Core.cInstrPcUpdateSkipped = 0;
3248#endif
3249#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3250 pReNative->fSimdRaiseXcptChecksEmitted = 0;
3251#endif
3252 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
3253#if IEMNATIVE_HST_GREG_COUNT < 32
3254 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
3255#endif
3256 ;
3257 pReNative->Core.bmHstRegsWithGstShadow = 0;
3258 pReNative->Core.bmGstRegShadows = 0;
3259#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3260 pReNative->Core.bmGstRegShadowDirty = 0;
3261#endif
3262 pReNative->Core.bmVars = 0;
3263 pReNative->Core.bmStack = 0;
3264 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
3265 pReNative->Core.u64ArgVars = UINT64_MAX;
3266
3267 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 17);
3268 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
3269 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
3270 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
3271 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
3272 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
3273 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
3274 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
3275 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
3276 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
3277 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
3278 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
3279 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
3280 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
3281 pReNative->aidxUniqueLabels[13] = UINT32_MAX;
3282 pReNative->aidxUniqueLabels[14] = UINT32_MAX;
3283 pReNative->aidxUniqueLabels[15] = UINT32_MAX;
3284 pReNative->aidxUniqueLabels[16] = UINT32_MAX;
3285
3286 /* Full host register reinit: */
3287 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
3288 {
3289 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
3290 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
3291 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
3292 }
3293
3294 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
3295 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
3296#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3297 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
3298#endif
3299#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3300 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
3301#endif
3302#ifdef IEMNATIVE_REG_FIXED_TMP1
3303 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
3304#endif
3305#ifdef IEMNATIVE_REG_FIXED_PC_DBG
3306 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
3307#endif
3308 );
3309 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3310 {
3311 fRegs &= ~RT_BIT_32(idxReg);
3312 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
3313 }
3314
3315 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
3316#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3317 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
3318#endif
3319#ifdef IEMNATIVE_REG_FIXED_TMP0
3320 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3321#endif
3322#ifdef IEMNATIVE_REG_FIXED_TMP1
3323 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
3324#endif
3325#ifdef IEMNATIVE_REG_FIXED_PC_DBG
3326 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
3327#endif
3328
3329#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3330 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
3331# if IEMNATIVE_HST_SIMD_REG_COUNT < 32
3332 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
3333# endif
3334 ;
3335 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
3336 pReNative->Core.bmGstSimdRegShadows = 0;
3337 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
3338 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
3339
3340 /* Full host register reinit: */
3341 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
3342 {
3343 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
3344 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
3345 pReNative->Core.aHstSimdRegs[i].idxVar = UINT8_MAX;
3346 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
3347 }
3348
3349 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK;
3350 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3351 {
3352 fRegs &= ~RT_BIT_32(idxReg);
3353 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
3354 }
3355
3356#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
3357 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3358#endif
3359
3360#endif
3361
3362 return pReNative;
3363}
3364
3365
3366/**
3367 * Allocates and initializes the native recompiler state.
3368 *
3369 * This is called the first time an EMT wants to recompile something.
3370 *
3371 * @returns Pointer to the new recompiler state.
3372 * @param pVCpu The cross context virtual CPU structure of the calling
3373 * thread.
3374 * @param pTb The TB that's about to be recompiled.
3375 * @thread EMT(pVCpu)
3376 */
3377static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
3378{
3379 VMCPU_ASSERT_EMT(pVCpu);
3380
3381 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
3382 AssertReturn(pReNative, NULL);
3383
3384 /*
3385 * Try allocate all the buffers and stuff we need.
3386 */
3387 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
3388 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
3389 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
3390#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3391 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
3392#endif
3393 if (RT_LIKELY( pReNative->pInstrBuf
3394 && pReNative->paLabels
3395 && pReNative->paFixups)
3396#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3397 && pReNative->pDbgInfo
3398#endif
3399 )
3400 {
3401 /*
3402 * Set the buffer & array sizes on success.
3403 */
3404 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
3405 pReNative->cLabelsAlloc = _8K;
3406 pReNative->cFixupsAlloc = _16K;
3407#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3408 pReNative->cDbgInfoAlloc = _16K;
3409#endif
3410
3411 /* Other constant stuff: */
3412 pReNative->pVCpu = pVCpu;
3413
3414 /*
3415 * Done, just need to save it and reinit it.
3416 */
3417 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
3418 return iemNativeReInit(pReNative, pTb);
3419 }
3420
3421 /*
3422 * Failed. Cleanup and return.
3423 */
3424 AssertFailed();
3425 RTMemFree(pReNative->pInstrBuf);
3426 RTMemFree(pReNative->paLabels);
3427 RTMemFree(pReNative->paFixups);
3428#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3429 RTMemFree(pReNative->pDbgInfo);
3430#endif
3431 RTMemFree(pReNative);
3432 return NULL;
3433}
3434
3435
3436/**
3437 * Creates a label
3438 *
3439 * If the label does not yet have a defined position,
3440 * call iemNativeLabelDefine() later to set it.
3441 *
3442 * @returns Label ID. Throws VBox status code on failure, so no need to check
3443 * the return value.
3444 * @param pReNative The native recompile state.
3445 * @param enmType The label type.
3446 * @param offWhere The instruction offset of the label. UINT32_MAX if the
3447 * label is not yet defined (default).
3448 * @param uData Data associated with the lable. Only applicable to
3449 * certain type of labels. Default is zero.
3450 */
3451DECL_HIDDEN_THROW(uint32_t)
3452iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3453 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
3454{
3455 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
3456
3457 /*
3458 * Locate existing label definition.
3459 *
3460 * This is only allowed for forward declarations where offWhere=UINT32_MAX
3461 * and uData is zero.
3462 */
3463 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3464 uint32_t const cLabels = pReNative->cLabels;
3465 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
3466#ifndef VBOX_STRICT
3467 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
3468 && offWhere == UINT32_MAX
3469 && uData == 0
3470#endif
3471 )
3472 {
3473#ifndef VBOX_STRICT
3474 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
3475 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3476 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
3477 if (idxLabel < pReNative->cLabels)
3478 return idxLabel;
3479#else
3480 for (uint32_t i = 0; i < cLabels; i++)
3481 if ( paLabels[i].enmType == enmType
3482 && paLabels[i].uData == uData)
3483 {
3484 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3485 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3486 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
3487 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
3488 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3489 return i;
3490 }
3491 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
3492 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3493#endif
3494 }
3495
3496 /*
3497 * Make sure we've got room for another label.
3498 */
3499 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
3500 { /* likely */ }
3501 else
3502 {
3503 uint32_t cNew = pReNative->cLabelsAlloc;
3504 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3505 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3506 cNew *= 2;
3507 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
3508 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
3509 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
3510 pReNative->paLabels = paLabels;
3511 pReNative->cLabelsAlloc = cNew;
3512 }
3513
3514 /*
3515 * Define a new label.
3516 */
3517 paLabels[cLabels].off = offWhere;
3518 paLabels[cLabels].enmType = enmType;
3519 paLabels[cLabels].uData = uData;
3520 pReNative->cLabels = cLabels + 1;
3521
3522 Assert((unsigned)enmType < 64);
3523 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
3524
3525 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3526 {
3527 Assert(uData == 0);
3528 pReNative->aidxUniqueLabels[enmType] = cLabels;
3529 }
3530
3531 if (offWhere != UINT32_MAX)
3532 {
3533#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3534 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3535 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
3536#endif
3537 }
3538 return cLabels;
3539}
3540
3541
3542/**
3543 * Defines the location of an existing label.
3544 *
3545 * @param pReNative The native recompile state.
3546 * @param idxLabel The label to define.
3547 * @param offWhere The position.
3548 */
3549DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
3550{
3551 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
3552 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
3553 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
3554 pLabel->off = offWhere;
3555#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3556 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3557 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
3558#endif
3559}
3560
3561
3562/**
3563 * Looks up a lable.
3564 *
3565 * @returns Label ID if found, UINT32_MAX if not.
3566 */
3567static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3568 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
3569{
3570 Assert((unsigned)enmType < 64);
3571 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
3572 {
3573 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3574 return pReNative->aidxUniqueLabels[enmType];
3575
3576 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3577 uint32_t const cLabels = pReNative->cLabels;
3578 for (uint32_t i = 0; i < cLabels; i++)
3579 if ( paLabels[i].enmType == enmType
3580 && paLabels[i].uData == uData
3581 && ( paLabels[i].off == offWhere
3582 || offWhere == UINT32_MAX
3583 || paLabels[i].off == UINT32_MAX))
3584 return i;
3585 }
3586 return UINT32_MAX;
3587}
3588
3589
3590/**
3591 * Adds a fixup.
3592 *
3593 * @throws VBox status code (int) on failure.
3594 * @param pReNative The native recompile state.
3595 * @param offWhere The instruction offset of the fixup location.
3596 * @param idxLabel The target label ID for the fixup.
3597 * @param enmType The fixup type.
3598 * @param offAddend Fixup addend if applicable to the type. Default is 0.
3599 */
3600DECL_HIDDEN_THROW(void)
3601iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
3602 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
3603{
3604 Assert(idxLabel <= UINT16_MAX);
3605 Assert((unsigned)enmType <= UINT8_MAX);
3606#ifdef RT_ARCH_ARM64
3607 AssertStmt( enmType != kIemNativeFixupType_RelImm14At5
3608 || pReNative->paLabels[idxLabel].enmType >= kIemNativeLabelType_LastWholeTbBranch,
3609 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_SHORT_JMP_TO_TAIL_LABEL));
3610#endif
3611
3612 /*
3613 * Make sure we've room.
3614 */
3615 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
3616 uint32_t const cFixups = pReNative->cFixups;
3617 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
3618 { /* likely */ }
3619 else
3620 {
3621 uint32_t cNew = pReNative->cFixupsAlloc;
3622 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3623 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3624 cNew *= 2;
3625 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
3626 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
3627 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
3628 pReNative->paFixups = paFixups;
3629 pReNative->cFixupsAlloc = cNew;
3630 }
3631
3632 /*
3633 * Add the fixup.
3634 */
3635 paFixups[cFixups].off = offWhere;
3636 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
3637 paFixups[cFixups].enmType = enmType;
3638 paFixups[cFixups].offAddend = offAddend;
3639 pReNative->cFixups = cFixups + 1;
3640}
3641
3642
3643/**
3644 * Slow code path for iemNativeInstrBufEnsure.
3645 */
3646DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
3647{
3648 /* Double the buffer size till we meet the request. */
3649 uint32_t cNew = pReNative->cInstrBufAlloc;
3650 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
3651 do
3652 cNew *= 2;
3653 while (cNew < off + cInstrReq);
3654
3655 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
3656#ifdef RT_ARCH_ARM64
3657 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
3658#else
3659 uint32_t const cbMaxInstrBuf = _2M;
3660#endif
3661 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
3662
3663 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
3664 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
3665
3666#ifdef VBOX_STRICT
3667 pReNative->offInstrBufChecked = off + cInstrReq;
3668#endif
3669 pReNative->cInstrBufAlloc = cNew;
3670 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
3671}
3672
3673#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3674
3675/**
3676 * Grows the static debug info array used during recompilation.
3677 *
3678 * @returns Pointer to the new debug info block; throws VBox status code on
3679 * failure, so no need to check the return value.
3680 */
3681DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3682{
3683 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
3684 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
3685 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
3686 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
3687 pReNative->pDbgInfo = pDbgInfo;
3688 pReNative->cDbgInfoAlloc = cNew;
3689 return pDbgInfo;
3690}
3691
3692
3693/**
3694 * Adds a new debug info uninitialized entry, returning the pointer to it.
3695 */
3696DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3697{
3698 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
3699 { /* likely */ }
3700 else
3701 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
3702 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
3703}
3704
3705
3706/**
3707 * Debug Info: Adds a native offset record, if necessary.
3708 */
3709DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3710{
3711 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
3712
3713 /*
3714 * Search backwards to see if we've got a similar record already.
3715 */
3716 uint32_t idx = pDbgInfo->cEntries;
3717 uint32_t idxStop = idx > 16 ? idx - 16 : 0;
3718 while (idx-- > idxStop)
3719 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
3720 {
3721 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
3722 return;
3723 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
3724 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
3725 break;
3726 }
3727
3728 /*
3729 * Add it.
3730 */
3731 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
3732 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
3733 pEntry->NativeOffset.offNative = off;
3734}
3735
3736
3737/**
3738 * Debug Info: Record info about a label.
3739 */
3740static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
3741{
3742 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3743 pEntry->Label.uType = kIemTbDbgEntryType_Label;
3744 pEntry->Label.uUnused = 0;
3745 pEntry->Label.enmLabel = (uint8_t)enmType;
3746 pEntry->Label.uData = uData;
3747}
3748
3749
3750/**
3751 * Debug Info: Record info about a threaded call.
3752 */
3753static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
3754{
3755 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3756 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
3757 pEntry->ThreadedCall.fRecompiled = fRecompiled;
3758 pEntry->ThreadedCall.uUnused = 0;
3759 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
3760}
3761
3762
3763/**
3764 * Debug Info: Record info about a new guest instruction.
3765 */
3766static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
3767{
3768 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3769 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
3770 pEntry->GuestInstruction.uUnused = 0;
3771 pEntry->GuestInstruction.fExec = fExec;
3772}
3773
3774
3775/**
3776 * Debug Info: Record info about guest register shadowing.
3777 */
3778DECL_HIDDEN_THROW(void)
3779iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
3780 uint8_t idxHstReg /*= UINT8_MAX*/, uint8_t idxHstRegPrev /*= UINT8_MAX*/)
3781{
3782 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3783 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
3784 pEntry->GuestRegShadowing.uUnused = 0;
3785 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
3786 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
3787 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
3788#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3789 Assert( idxHstReg != UINT8_MAX
3790 || !(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg)));
3791#endif
3792}
3793
3794
3795# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3796/**
3797 * Debug Info: Record info about guest register shadowing.
3798 */
3799DECL_HIDDEN_THROW(void)
3800iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
3801 uint8_t idxHstSimdReg /*= UINT8_MAX*/, uint8_t idxHstSimdRegPrev /*= UINT8_MAX*/)
3802{
3803 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3804 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
3805 pEntry->GuestSimdRegShadowing.uUnused = 0;
3806 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
3807 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
3808 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
3809}
3810# endif
3811
3812
3813# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3814/**
3815 * Debug Info: Record info about delayed RIP updates.
3816 */
3817DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t offPc, uint32_t cInstrSkipped)
3818{
3819 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3820 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
3821 pEntry->DelayedPcUpdate.offPc = offPc;
3822 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
3823}
3824# endif
3825
3826
3827# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3828/**
3829 * Debug Info: Record info about a dirty guest register.
3830 */
3831DECL_HIDDEN_THROW(void) iemNaitveDbgInfoAddGuestRegDirty(PIEMRECOMPILERSTATE pReNative, bool fSimdReg,
3832 uint8_t idxGstReg, uint8_t idxHstReg)
3833{
3834 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3835 pEntry->GuestRegDirty.uType = kIemTbDbgEntryType_GuestRegDirty;
3836 pEntry->GuestRegDirty.fSimdReg = fSimdReg ? 1 : 0;
3837 pEntry->GuestRegDirty.idxGstReg = idxGstReg;
3838 pEntry->GuestRegDirty.idxHstReg = idxHstReg;
3839}
3840
3841
3842/**
3843 * Debug Info: Record info about a dirty guest register writeback operation.
3844 */
3845DECL_HIDDEN_THROW(void) iemNaitveDbgInfoAddGuestRegWriteback(PIEMRECOMPILERSTATE pReNative, bool fSimdReg, uint64_t fGstReg)
3846{
3847 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3848 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
3849 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
3850 pEntry->GuestRegWriteback.fGstReg = (uint32_t)fGstReg;
3851 /** @todo r=aeichner Can't fit the whole register mask in the debug info entry, deal with it when it becomes necessary. */
3852 Assert((uint64_t)pEntry->GuestRegWriteback.fGstReg == fGstReg);
3853}
3854# endif
3855
3856#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
3857
3858
3859/*********************************************************************************************************************************
3860* Register Allocator *
3861*********************************************************************************************************************************/
3862
3863/**
3864 * Register parameter indexes (indexed by argument number).
3865 */
3866DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
3867{
3868 IEMNATIVE_CALL_ARG0_GREG,
3869 IEMNATIVE_CALL_ARG1_GREG,
3870 IEMNATIVE_CALL_ARG2_GREG,
3871 IEMNATIVE_CALL_ARG3_GREG,
3872#if defined(IEMNATIVE_CALL_ARG4_GREG)
3873 IEMNATIVE_CALL_ARG4_GREG,
3874# if defined(IEMNATIVE_CALL_ARG5_GREG)
3875 IEMNATIVE_CALL_ARG5_GREG,
3876# if defined(IEMNATIVE_CALL_ARG6_GREG)
3877 IEMNATIVE_CALL_ARG6_GREG,
3878# if defined(IEMNATIVE_CALL_ARG7_GREG)
3879 IEMNATIVE_CALL_ARG7_GREG,
3880# endif
3881# endif
3882# endif
3883#endif
3884};
3885AssertCompile(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3886
3887/**
3888 * Call register masks indexed by argument count.
3889 */
3890DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
3891{
3892 0,
3893 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
3894 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
3895 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
3896 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3897 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
3898#if defined(IEMNATIVE_CALL_ARG4_GREG)
3899 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3900 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
3901# if defined(IEMNATIVE_CALL_ARG5_GREG)
3902 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3903 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
3904# if defined(IEMNATIVE_CALL_ARG6_GREG)
3905 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3906 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3907 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
3908# if defined(IEMNATIVE_CALL_ARG7_GREG)
3909 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3910 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3911 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
3912# endif
3913# endif
3914# endif
3915#endif
3916};
3917
3918#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
3919/**
3920 * BP offset of the stack argument slots.
3921 *
3922 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
3923 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
3924 */
3925DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
3926{
3927 IEMNATIVE_FP_OFF_STACK_ARG0,
3928# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
3929 IEMNATIVE_FP_OFF_STACK_ARG1,
3930# endif
3931# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
3932 IEMNATIVE_FP_OFF_STACK_ARG2,
3933# endif
3934# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
3935 IEMNATIVE_FP_OFF_STACK_ARG3,
3936# endif
3937};
3938AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
3939#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
3940
3941/**
3942 * Info about shadowed guest register values.
3943 * @see IEMNATIVEGSTREG
3944 */
3945DECL_HIDDEN_CONST(IEMANTIVEGSTREGINFO const) g_aGstShadowInfo[] =
3946{
3947#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
3948 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
3949 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
3950 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
3951 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
3952 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
3953 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
3954 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
3955 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
3956 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
3957 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
3958 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
3959 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
3960 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
3961 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
3962 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
3963 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
3964 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
3965 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
3966 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
3967 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
3968 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
3969 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
3970 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
3971 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
3972 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
3973 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
3974 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
3975 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
3976 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
3977 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
3978 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
3979 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
3980 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
3981 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
3982 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
3983 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
3984 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
3985 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
3986 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
3987 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
3988 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
3989 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
3990 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
3991 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
3992 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
3993 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
3994 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
3995 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
3996#undef CPUMCTX_OFF_AND_SIZE
3997};
3998AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
3999
4000
4001/** Host CPU general purpose register names. */
4002DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
4003{
4004#ifdef RT_ARCH_AMD64
4005 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
4006#elif RT_ARCH_ARM64
4007 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
4008 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
4009#else
4010# error "port me"
4011#endif
4012};
4013
4014
4015#if 0 /* unused */
4016/**
4017 * Tries to locate a suitable register in the given register mask.
4018 *
4019 * This ASSUMES the caller has done the minimal/optimal allocation checks and
4020 * failed.
4021 *
4022 * @returns Host register number on success, returns UINT8_MAX on failure.
4023 */
4024static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
4025{
4026 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
4027 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
4028 if (fRegs)
4029 {
4030 /** @todo pick better here: */
4031 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
4032
4033 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4034 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4035 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4036 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4037
4038 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4039 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4040 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4041 return idxReg;
4042 }
4043 return UINT8_MAX;
4044}
4045#endif /* unused */
4046
4047
4048#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4049/**
4050 * Stores the host reg @a idxHstReg into guest shadow register @a enmGstReg.
4051 *
4052 * @returns New code buffer offset on success, UINT32_MAX on failure.
4053 * @param pReNative .
4054 * @param off The current code buffer position.
4055 * @param enmGstReg The guest register to store to.
4056 * @param idxHstReg The host register to store from.
4057 */
4058DECL_FORCE_INLINE_THROW(uint32_t)
4059iemNativeEmitStoreGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg, uint8_t idxHstReg)
4060{
4061 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
4062 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
4063
4064 switch (g_aGstShadowInfo[enmGstReg].cb)
4065 {
4066 case sizeof(uint64_t):
4067 return iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4068 case sizeof(uint32_t):
4069 return iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4070 case sizeof(uint16_t):
4071 return iemNativeEmitStoreGprToVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4072#if 0 /* not present in the table. */
4073 case sizeof(uint8_t):
4074 return iemNativeEmitStoreGprToVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4075#endif
4076 default:
4077 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
4078 }
4079}
4080
4081
4082/**
4083 * Emits code to flush a pending write of the given guest register if any.
4084 *
4085 * @returns New code buffer offset.
4086 * @param pReNative The native recompile state.
4087 * @param off Current code buffer position.
4088 * @param enmGstReg The guest register to flush.
4089 */
4090DECL_HIDDEN_THROW(uint32_t)
4091iemNativeRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg)
4092{
4093 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4094
4095 Assert(enmGstReg >= kIemNativeGstReg_GprFirst && enmGstReg <= kIemNativeGstReg_GprLast);
4096 Assert( idxHstReg != UINT8_MAX
4097 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg));
4098 Log12(("iemNativeRegFlushPendingWrite: Clearing guest register %s shadowed by host %s\n",
4099 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
4100
4101 off = iemNativeEmitStoreGprWithGstShadowReg(pReNative, off, enmGstReg, idxHstReg);
4102
4103 pReNative->Core.bmGstRegShadowDirty &= ~RT_BIT_64(enmGstReg);
4104 return off;
4105}
4106
4107
4108/**
4109 * Flush the given set of guest registers if marked as dirty.
4110 *
4111 * @returns New code buffer offset.
4112 * @param pReNative The native recompile state.
4113 * @param off Current code buffer position.
4114 * @param fFlushGstReg The guest register set to flush (default is flush everything).
4115 */
4116DECL_HIDDEN_THROW(uint32_t)
4117iemNativeRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstReg /*= UINT64_MAX*/)
4118{
4119 if (pReNative->Core.bmGstRegShadowDirty & fFlushGstReg)
4120 {
4121# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4122 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4123 iemNaitveDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, pReNative->Core.bmGstRegShadowDirty & fFlushGstReg);
4124# endif
4125
4126 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fFlushGstReg;
4127 uint32_t idxGstReg = 0;
4128
4129 do
4130 {
4131 if (bmGstRegShadowDirty & 0x1)
4132 {
4133 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
4134 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4135 }
4136 idxGstReg++;
4137 bmGstRegShadowDirty >>= 1;
4138 } while (bmGstRegShadowDirty);
4139 }
4140
4141 return off;
4142}
4143
4144
4145/**
4146 * Flush all shadowed guest registers marked as dirty for the given host register.
4147 *
4148 * @returns New code buffer offset.
4149 * @param pReNative The native recompile state.
4150 * @param off Current code buffer position.
4151 * @param idxHstReg The host register.
4152 *
4153 * @note This doesn't do any unshadowing of guest registers from the host register.
4154 */
4155DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushDirtyGuestByHostRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg)
4156{
4157 /* We need to flush any pending guest register writes this host register shadows. */
4158 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4159 if (pReNative->Core.bmGstRegShadowDirty & fGstRegShadows)
4160 {
4161# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4162 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4163 iemNaitveDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, pReNative->Core.bmGstRegShadowDirty & fGstRegShadows);
4164# endif
4165
4166 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fGstRegShadows;
4167 uint32_t idxGstReg = 0;
4168 do
4169 {
4170 if (bmGstRegShadowDirty & 0x1)
4171 {
4172 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
4173 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4174 }
4175 idxGstReg++;
4176 bmGstRegShadowDirty >>= 1;
4177 } while (bmGstRegShadowDirty);
4178 }
4179
4180 return off;
4181}
4182#endif
4183
4184
4185/**
4186 * Locate a register, possibly freeing one up.
4187 *
4188 * This ASSUMES the caller has done the minimal/optimal allocation checks and
4189 * failed.
4190 *
4191 * @returns Host register number on success. Returns UINT8_MAX if no registers
4192 * found, the caller is supposed to deal with this and raise a
4193 * allocation type specific status code (if desired).
4194 *
4195 * @throws VBox status code if we're run into trouble spilling a variable of
4196 * recording debug info. Does NOT throw anything if we're out of
4197 * registers, though.
4198 */
4199static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
4200 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
4201{
4202 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
4203 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
4204 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
4205
4206 /*
4207 * Try a freed register that's shadowing a guest register.
4208 */
4209 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
4210 if (fRegs)
4211 {
4212 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
4213
4214#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4215 /*
4216 * When we have livness information, we use it to kick out all shadowed
4217 * guest register that will not be needed any more in this TB. If we're
4218 * lucky, this may prevent us from ending up here again.
4219 *
4220 * Note! We must consider the previous entry here so we don't free
4221 * anything that the current threaded function requires (current
4222 * entry is produced by the next threaded function).
4223 */
4224 uint32_t const idxCurCall = pReNative->idxCurCall;
4225 if (idxCurCall > 0)
4226 {
4227 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
4228
4229# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4230 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
4231 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
4232 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
4233#else
4234 /* Construct a mask of the registers not in the read or write state.
4235 Note! We could skips writes, if they aren't from us, as this is just
4236 a hack to prevent trashing registers that have just been written
4237 or will be written when we retire the current instruction. */
4238 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
4239 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
4240 & IEMLIVENESSBIT_MASK;
4241#endif
4242 /* Merge EFLAGS. */
4243 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
4244 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
4245 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
4246 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
4247 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
4248
4249 /* If it matches any shadowed registers. */
4250 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
4251 {
4252#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4253 /* Writeback any dirty shadow registers we are about to unshadow. */
4254 *poff = iemNativeRegFlushDirtyGuest(pReNative, *poff, fToFreeMask);
4255#endif
4256
4257 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
4258 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
4259 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
4260
4261 /* See if we've got any unshadowed registers we can return now. */
4262 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
4263 if (fUnshadowedRegs)
4264 {
4265 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
4266 return (fPreferVolatile
4267 ? ASMBitFirstSetU32(fUnshadowedRegs)
4268 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4269 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
4270 - 1;
4271 }
4272 }
4273 }
4274#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4275
4276 unsigned const idxReg = (fPreferVolatile
4277 ? ASMBitFirstSetU32(fRegs)
4278 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4279 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
4280 - 1;
4281
4282 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4283 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4284 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4285 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4286
4287#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4288 /* We need to flush any pending guest register writes this host register shadows. */
4289 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
4290#endif
4291
4292 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4293 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4294 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4295 return idxReg;
4296 }
4297
4298 /*
4299 * Try free up a variable that's in a register.
4300 *
4301 * We do two rounds here, first evacuating variables we don't need to be
4302 * saved on the stack, then in the second round move things to the stack.
4303 */
4304 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
4305 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
4306 {
4307 uint32_t fVars = pReNative->Core.bmVars;
4308 while (fVars)
4309 {
4310 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
4311 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
4312#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4313 if (pReNative->Core.aVars[idxVar].fSimdReg) /* Need to ignore SIMD variables here or we end up freeing random registers. */
4314 continue;
4315#endif
4316
4317 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
4318 && (RT_BIT_32(idxReg) & fRegMask)
4319 && ( iLoop == 0
4320 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
4321 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
4322 && !pReNative->Core.aVars[idxVar].fRegAcquired)
4323 {
4324 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
4325 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
4326 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4327 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4328 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4329 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
4330#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4331 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
4332#endif
4333
4334 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
4335 {
4336 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
4337 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
4338 }
4339
4340 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4341 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
4342
4343 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4344 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4345 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4346 return idxReg;
4347 }
4348 fVars &= ~RT_BIT_32(idxVar);
4349 }
4350 }
4351
4352 return UINT8_MAX;
4353}
4354
4355
4356/**
4357 * Reassigns a variable to a different register specified by the caller.
4358 *
4359 * @returns The new code buffer position.
4360 * @param pReNative The native recompile state.
4361 * @param off The current code buffer position.
4362 * @param idxVar The variable index.
4363 * @param idxRegOld The old host register number.
4364 * @param idxRegNew The new host register number.
4365 * @param pszCaller The caller for logging.
4366 */
4367static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4368 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
4369{
4370 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4371 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
4372#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4373 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4374#endif
4375 RT_NOREF(pszCaller);
4376
4377 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
4378
4379 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
4380#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4381 Assert(!(fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
4382#endif
4383 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
4384 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
4385 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
4386
4387 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
4388 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
4389 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
4390 if (fGstRegShadows)
4391 {
4392 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
4393 | RT_BIT_32(idxRegNew);
4394 while (fGstRegShadows)
4395 {
4396 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4397 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4398
4399 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
4400 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
4401 }
4402 }
4403
4404 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
4405 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
4406 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
4407 return off;
4408}
4409
4410
4411/**
4412 * Moves a variable to a different register or spills it onto the stack.
4413 *
4414 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
4415 * kinds can easily be recreated if needed later.
4416 *
4417 * @returns The new code buffer position.
4418 * @param pReNative The native recompile state.
4419 * @param off The current code buffer position.
4420 * @param idxVar The variable index.
4421 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
4422 * call-volatile registers.
4423 */
4424DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4425 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_GREG_MASK*/)
4426{
4427 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4428 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4429 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
4430 Assert(!pVar->fRegAcquired);
4431
4432 uint8_t const idxRegOld = pVar->idxReg;
4433 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
4434 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
4435 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
4436 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
4437 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
4438 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4439 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
4440 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
4441#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4442 Assert(!(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
4443#endif
4444
4445
4446 /** @todo Add statistics on this.*/
4447 /** @todo Implement basic variable liveness analysis (python) so variables
4448 * can be freed immediately once no longer used. This has the potential to
4449 * be trashing registers and stack for dead variables.
4450 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
4451
4452 /*
4453 * First try move it to a different register, as that's cheaper.
4454 */
4455 fForbiddenRegs |= RT_BIT_32(idxRegOld);
4456 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
4457 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
4458 if (fRegs)
4459 {
4460 /* Avoid using shadow registers, if possible. */
4461 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
4462 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
4463 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
4464 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
4465 }
4466
4467 /*
4468 * Otherwise we must spill the register onto the stack.
4469 */
4470 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
4471 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
4472 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
4473 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
4474
4475 pVar->idxReg = UINT8_MAX;
4476 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
4477 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
4478 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
4479 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
4480 return off;
4481}
4482
4483
4484/**
4485 * Allocates a temporary host general purpose register.
4486 *
4487 * This may emit code to save register content onto the stack in order to free
4488 * up a register.
4489 *
4490 * @returns The host register number; throws VBox status code on failure,
4491 * so no need to check the return value.
4492 * @param pReNative The native recompile state.
4493 * @param poff Pointer to the variable with the code buffer position.
4494 * This will be update if we need to move a variable from
4495 * register to stack in order to satisfy the request.
4496 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4497 * registers (@c true, default) or the other way around
4498 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4499 */
4500DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
4501{
4502 /*
4503 * Try find a completely unused register, preferably a call-volatile one.
4504 */
4505 uint8_t idxReg;
4506 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4507 & ~pReNative->Core.bmHstRegsWithGstShadow
4508 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
4509 if (fRegs)
4510 {
4511 if (fPreferVolatile)
4512 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4513 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4514 else
4515 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4516 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4517 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4518 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4519 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4520 }
4521 else
4522 {
4523 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
4524 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4525 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4526 }
4527 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4528}
4529
4530
4531/**
4532 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
4533 * registers.
4534 *
4535 * @returns The host register number; throws VBox status code on failure,
4536 * so no need to check the return value.
4537 * @param pReNative The native recompile state.
4538 * @param poff Pointer to the variable with the code buffer position.
4539 * This will be update if we need to move a variable from
4540 * register to stack in order to satisfy the request.
4541 * @param fRegMask Mask of acceptable registers.
4542 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4543 * registers (@c true, default) or the other way around
4544 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4545 */
4546DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
4547 bool fPreferVolatile /*= true*/)
4548{
4549 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
4550 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
4551
4552 /*
4553 * Try find a completely unused register, preferably a call-volatile one.
4554 */
4555 uint8_t idxReg;
4556 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4557 & ~pReNative->Core.bmHstRegsWithGstShadow
4558 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
4559 & fRegMask;
4560 if (fRegs)
4561 {
4562 if (fPreferVolatile)
4563 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4564 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4565 else
4566 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4567 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4568 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4569 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4570 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4571 }
4572 else
4573 {
4574 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
4575 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4576 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4577 }
4578 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4579}
4580
4581
4582/**
4583 * Allocates a temporary register for loading an immediate value into.
4584 *
4585 * This will emit code to load the immediate, unless there happens to be an
4586 * unused register with the value already loaded.
4587 *
4588 * The caller will not modify the returned register, it must be considered
4589 * read-only. Free using iemNativeRegFreeTmpImm.
4590 *
4591 * @returns The host register number; throws VBox status code on failure, so no
4592 * need to check the return value.
4593 * @param pReNative The native recompile state.
4594 * @param poff Pointer to the variable with the code buffer position.
4595 * @param uImm The immediate value that the register must hold upon
4596 * return.
4597 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4598 * registers (@c true, default) or the other way around
4599 * (@c false).
4600 *
4601 * @note Reusing immediate values has not been implemented yet.
4602 */
4603DECL_HIDDEN_THROW(uint8_t)
4604iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
4605{
4606 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
4607 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
4608 return idxReg;
4609}
4610
4611
4612/**
4613 * Allocates a temporary host general purpose register for keeping a guest
4614 * register value.
4615 *
4616 * Since we may already have a register holding the guest register value,
4617 * code will be emitted to do the loading if that's not the case. Code may also
4618 * be emitted if we have to free up a register to satify the request.
4619 *
4620 * @returns The host register number; throws VBox status code on failure, so no
4621 * need to check the return value.
4622 * @param pReNative The native recompile state.
4623 * @param poff Pointer to the variable with the code buffer
4624 * position. This will be update if we need to move a
4625 * variable from register to stack in order to satisfy
4626 * the request.
4627 * @param enmGstReg The guest register that will is to be updated.
4628 * @param enmIntendedUse How the caller will be using the host register.
4629 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
4630 * register is okay (default). The ASSUMPTION here is
4631 * that the caller has already flushed all volatile
4632 * registers, so this is only applied if we allocate a
4633 * new register.
4634 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
4635 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
4636 */
4637DECL_HIDDEN_THROW(uint8_t)
4638iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
4639 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
4640 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
4641{
4642 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4643#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4644 AssertMsg( fSkipLivenessAssert
4645 || pReNative->idxCurCall == 0
4646 || enmGstReg == kIemNativeGstReg_Pc
4647 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4648 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4649 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
4650 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4651 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
4652 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4653#endif
4654 RT_NOREF(fSkipLivenessAssert);
4655#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
4656 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
4657#endif
4658 uint32_t const fRegMask = !fNoVolatileRegs
4659 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
4660 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4661
4662 /*
4663 * First check if the guest register value is already in a host register.
4664 */
4665 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4666 {
4667 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4668 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4669 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4670 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4671
4672 /* It's not supposed to be allocated... */
4673 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4674 {
4675 /*
4676 * If the register will trash the guest shadow copy, try find a
4677 * completely unused register we can use instead. If that fails,
4678 * we need to disassociate the host reg from the guest reg.
4679 */
4680 /** @todo would be nice to know if preserving the register is in any way helpful. */
4681 /* If the purpose is calculations, try duplicate the register value as
4682 we'll be clobbering the shadow. */
4683 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
4684 && ( ~pReNative->Core.bmHstRegs
4685 & ~pReNative->Core.bmHstRegsWithGstShadow
4686 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
4687 {
4688 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
4689
4690 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4691
4692 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4693 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4694 g_apszIemNativeHstRegNames[idxRegNew]));
4695 idxReg = idxRegNew;
4696 }
4697 /* If the current register matches the restrictions, go ahead and allocate
4698 it for the caller. */
4699 else if (fRegMask & RT_BIT_32(idxReg))
4700 {
4701 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4702 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4703 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4704 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4705 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
4706 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4707 else
4708 {
4709 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
4710 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
4711 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4712 }
4713 }
4714 /* Otherwise, allocate a register that satisfies the caller and transfer
4715 the shadowing if compatible with the intended use. (This basically
4716 means the call wants a non-volatile register (RSP push/pop scenario).) */
4717 else
4718 {
4719 Assert(fNoVolatileRegs);
4720 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
4721 !fNoVolatileRegs
4722 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
4723 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4724 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4725 {
4726 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4727 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
4728 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
4729 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4730 }
4731 else
4732 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4733 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4734 g_apszIemNativeHstRegNames[idxRegNew]));
4735 idxReg = idxRegNew;
4736 }
4737 }
4738 else
4739 {
4740 /*
4741 * Oops. Shadowed guest register already allocated!
4742 *
4743 * Allocate a new register, copy the value and, if updating, the
4744 * guest shadow copy assignment to the new register.
4745 */
4746 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4747 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
4748 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
4749 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
4750
4751 /** @todo share register for readonly access. */
4752 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
4753 enmIntendedUse == kIemNativeGstRegUse_Calculation);
4754
4755 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4756 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4757
4758 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4759 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4760 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
4761 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4762 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4763 else
4764 {
4765 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4766 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
4767 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4768 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4769 }
4770 idxReg = idxRegNew;
4771 }
4772 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
4773
4774#ifdef VBOX_STRICT
4775 /* Strict builds: Check that the value is correct. */
4776 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4777#endif
4778
4779#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4780 /** @todo r=aeichner Implement for registers other than GPR as well. */
4781 if ( ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4782 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
4783 && enmGstReg >= kIemNativeGstReg_GprFirst
4784 && enmGstReg <= kIemNativeGstReg_GprLast
4785 )
4786 {
4787# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4788 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
4789 iemNaitveDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
4790# endif
4791
4792 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
4793 }
4794#endif
4795
4796 return idxReg;
4797 }
4798
4799 /*
4800 * Allocate a new register, load it with the guest value and designate it as a copy of the
4801 */
4802 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
4803
4804 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4805 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
4806
4807 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4808 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
4809 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
4810 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4811
4812#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4813 /** @todo r=aeichner Implement for registers other than GPR as well. */
4814 if ( ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4815 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
4816 && enmGstReg >= kIemNativeGstReg_GprFirst
4817 && enmGstReg <= kIemNativeGstReg_GprLast
4818 )
4819 {
4820# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4821 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
4822 iemNaitveDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxRegNew);
4823# endif
4824
4825 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
4826 }
4827#endif
4828
4829 return idxRegNew;
4830}
4831
4832
4833/**
4834 * Allocates a temporary host general purpose register that already holds the
4835 * given guest register value.
4836 *
4837 * The use case for this function is places where the shadowing state cannot be
4838 * modified due to branching and such. This will fail if the we don't have a
4839 * current shadow copy handy or if it's incompatible. The only code that will
4840 * be emitted here is value checking code in strict builds.
4841 *
4842 * The intended use can only be readonly!
4843 *
4844 * @returns The host register number, UINT8_MAX if not present.
4845 * @param pReNative The native recompile state.
4846 * @param poff Pointer to the instruction buffer offset.
4847 * Will be updated in strict builds if a register is
4848 * found.
4849 * @param enmGstReg The guest register that will is to be updated.
4850 * @note In strict builds, this may throw instruction buffer growth failures.
4851 * Non-strict builds will not throw anything.
4852 * @sa iemNativeRegAllocTmpForGuestReg
4853 */
4854DECL_HIDDEN_THROW(uint8_t)
4855iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4856{
4857 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4858#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4859 AssertMsg( pReNative->idxCurCall == 0
4860 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4861 || enmGstReg == kIemNativeGstReg_Pc,
4862 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4863#endif
4864
4865 /*
4866 * First check if the guest register value is already in a host register.
4867 */
4868 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4869 {
4870 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4871 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4872 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4873 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4874
4875 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4876 {
4877 /*
4878 * We only do readonly use here, so easy compared to the other
4879 * variant of this code.
4880 */
4881 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4882 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4883 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4884 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
4885 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4886
4887#ifdef VBOX_STRICT
4888 /* Strict builds: Check that the value is correct. */
4889 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4890#else
4891 RT_NOREF(poff);
4892#endif
4893 return idxReg;
4894 }
4895 }
4896
4897 return UINT8_MAX;
4898}
4899
4900
4901/**
4902 * Allocates argument registers for a function call.
4903 *
4904 * @returns New code buffer offset on success; throws VBox status code on failure, so no
4905 * need to check the return value.
4906 * @param pReNative The native recompile state.
4907 * @param off The current code buffer offset.
4908 * @param cArgs The number of arguments the function call takes.
4909 */
4910DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
4911{
4912 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
4913 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
4914 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4915 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4916
4917 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4918 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4919 else if (cArgs == 0)
4920 return true;
4921
4922 /*
4923 * Do we get luck and all register are free and not shadowing anything?
4924 */
4925 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
4926 for (uint32_t i = 0; i < cArgs; i++)
4927 {
4928 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4929 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4930 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4931 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4932 }
4933 /*
4934 * Okay, not lucky so we have to free up the registers.
4935 */
4936 else
4937 for (uint32_t i = 0; i < cArgs; i++)
4938 {
4939 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4940 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
4941 {
4942 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4943 {
4944 case kIemNativeWhat_Var:
4945 {
4946 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4947 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4948 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
4949 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4950 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
4951#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4952 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4953#endif
4954
4955 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
4956 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4957 else
4958 {
4959 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4960 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4961 }
4962 break;
4963 }
4964
4965 case kIemNativeWhat_Tmp:
4966 case kIemNativeWhat_Arg:
4967 case kIemNativeWhat_rc:
4968 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4969 default:
4970 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
4971 }
4972
4973 }
4974 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4975 {
4976 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4977 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4978 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4979#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4980 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
4981#endif
4982 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4983 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4984 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4985 }
4986 else
4987 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4988 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4989 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4990 }
4991 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
4992 return true;
4993}
4994
4995
4996DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
4997
4998
4999#if 0
5000/**
5001 * Frees a register assignment of any type.
5002 *
5003 * @param pReNative The native recompile state.
5004 * @param idxHstReg The register to free.
5005 *
5006 * @note Does not update variables.
5007 */
5008DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
5009{
5010 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5011 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
5012 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
5013 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
5014 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
5015 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
5016 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
5017 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
5018 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
5019 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
5020 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
5021 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
5022 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
5023 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5024
5025 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
5026 /* no flushing, right:
5027 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5028 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5029 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5030 */
5031}
5032#endif
5033
5034
5035/**
5036 * Frees a temporary register.
5037 *
5038 * Any shadow copies of guest registers assigned to the host register will not
5039 * be flushed by this operation.
5040 */
5041DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
5042{
5043 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
5044 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
5045 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
5046 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
5047 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5048}
5049
5050
5051/**
5052 * Frees a temporary immediate register.
5053 *
5054 * It is assumed that the call has not modified the register, so it still hold
5055 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
5056 */
5057DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
5058{
5059 iemNativeRegFreeTmp(pReNative, idxHstReg);
5060}
5061
5062
5063/**
5064 * Frees a register assigned to a variable.
5065 *
5066 * The register will be disassociated from the variable.
5067 */
5068DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
5069{
5070 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
5071 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
5072 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
5073 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5074 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
5075#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5076 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
5077#endif
5078
5079 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
5080 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
5081 if (!fFlushShadows)
5082 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
5083 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
5084 else
5085 {
5086 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5087 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5088#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5089 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadowsOld));
5090#endif
5091 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5092 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
5093 uint64_t fGstRegShadows = fGstRegShadowsOld;
5094 while (fGstRegShadows)
5095 {
5096 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
5097 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
5098
5099 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
5100 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
5101 }
5102 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
5103 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
5104 }
5105}
5106
5107
5108#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5109# ifdef LOG_ENABLED
5110/** Host CPU SIMD register names. */
5111DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
5112{
5113# ifdef RT_ARCH_AMD64
5114 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
5115# elif RT_ARCH_ARM64
5116 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
5117 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
5118# else
5119# error "port me"
5120# endif
5121};
5122# endif
5123
5124
5125/**
5126 * Frees a SIMD register assigned to a variable.
5127 *
5128 * The register will be disassociated from the variable.
5129 */
5130DECLHIDDEN(void) iemNativeSimdRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
5131{
5132 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstReg));
5133 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
5134 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
5135 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5136 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
5137 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
5138
5139 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
5140 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
5141 if (!fFlushShadows)
5142 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
5143 g_apszIemNativeHstSimdRegNames[idxHstReg], pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows, idxVar));
5144 else
5145 {
5146 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5147 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows;
5148 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
5149 pReNative->Core.bmGstSimdRegShadows &= ~fGstRegShadowsOld;
5150 uint64_t fGstRegShadows = fGstRegShadowsOld;
5151 while (fGstRegShadows)
5152 {
5153 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
5154 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
5155
5156 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxHstReg);
5157 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = UINT8_MAX;
5158 }
5159 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
5160 g_apszIemNativeHstSimdRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
5161 }
5162}
5163
5164
5165/**
5166 * Reassigns a variable to a different SIMD register specified by the caller.
5167 *
5168 * @returns The new code buffer position.
5169 * @param pReNative The native recompile state.
5170 * @param off The current code buffer position.
5171 * @param idxVar The variable index.
5172 * @param idxRegOld The old host register number.
5173 * @param idxRegNew The new host register number.
5174 * @param pszCaller The caller for logging.
5175 */
5176static uint32_t iemNativeSimdRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
5177 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
5178{
5179 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5180 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
5181 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
5182 RT_NOREF(pszCaller);
5183
5184 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxRegNew, off);
5185
5186 uint64_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
5187 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5188 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
5189
5190 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
5191 pszCaller, idxVar, g_apszIemNativeHstSimdRegNames[idxRegOld], g_apszIemNativeHstSimdRegNames[idxRegNew], fGstRegShadows));
5192 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
5193
5194 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U))
5195 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxRegNew, idxRegOld);
5196 else
5197 {
5198 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U));
5199 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxRegNew, idxRegOld);
5200 }
5201
5202 pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
5203 pReNative->Core.aHstSimdRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
5204 pReNative->Core.aHstSimdRegs[idxRegNew].idxVar = idxVar;
5205 if (fGstRegShadows)
5206 {
5207 pReNative->Core.bmHstSimdRegsWithGstShadow = (pReNative->Core.bmHstSimdRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
5208 | RT_BIT_32(idxRegNew);
5209 while (fGstRegShadows)
5210 {
5211 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
5212 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
5213
5214 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxRegOld);
5215 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = idxRegNew;
5216 }
5217 }
5218
5219 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
5220 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
5221 pReNative->Core.bmHstSimdRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstSimdRegs & ~RT_BIT_32(idxRegOld));
5222 return off;
5223}
5224
5225
5226/**
5227 * Moves a variable to a different register or spills it onto the stack.
5228 *
5229 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
5230 * kinds can easily be recreated if needed later.
5231 *
5232 * @returns The new code buffer position.
5233 * @param pReNative The native recompile state.
5234 * @param off The current code buffer position.
5235 * @param idxVar The variable index.
5236 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
5237 * call-volatile registers.
5238 */
5239DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
5240 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK*/)
5241{
5242 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5243 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
5244 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
5245 Assert(!pVar->fRegAcquired);
5246 Assert(!pVar->fSimdReg);
5247
5248 uint8_t const idxRegOld = pVar->idxReg;
5249 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
5250 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegOld));
5251 Assert(pReNative->Core.aHstSimdRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
5252 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows)
5253 == pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows);
5254 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5255 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxRegOld))
5256 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
5257 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5258 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
5259
5260 /** @todo Add statistics on this.*/
5261 /** @todo Implement basic variable liveness analysis (python) so variables
5262 * can be freed immediately once no longer used. This has the potential to
5263 * be trashing registers and stack for dead variables.
5264 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
5265
5266 /*
5267 * First try move it to a different register, as that's cheaper.
5268 */
5269 fForbiddenRegs |= RT_BIT_32(idxRegOld);
5270 fForbiddenRegs |= IEMNATIVE_SIMD_REG_FIXED_MASK;
5271 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & ~fForbiddenRegs;
5272 if (fRegs)
5273 {
5274 /* Avoid using shadow registers, if possible. */
5275 if (fRegs & ~pReNative->Core.bmHstSimdRegsWithGstShadow)
5276 fRegs &= ~pReNative->Core.bmHstSimdRegsWithGstShadow;
5277 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
5278 return iemNativeSimdRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeSimdRegMoveOrSpillStackVar");
5279 }
5280
5281 /*
5282 * Otherwise we must spill the register onto the stack.
5283 */
5284 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
5285 Log12(("iemNativeSimdRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
5286 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
5287
5288 if (pVar->cbVar == sizeof(RTUINT128U))
5289 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
5290 else
5291 {
5292 Assert(pVar->cbVar == sizeof(RTUINT256U));
5293 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
5294 }
5295
5296 pVar->idxReg = UINT8_MAX;
5297 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
5298 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
5299 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
5300 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
5301 return off;
5302}
5303
5304
5305/**
5306 * Called right before emitting a call instruction to move anything important
5307 * out of call-volatile SIMD registers, free and flush the call-volatile SIMD registers,
5308 * optionally freeing argument variables.
5309 *
5310 * @returns New code buffer offset, UINT32_MAX on failure.
5311 * @param pReNative The native recompile state.
5312 * @param off The code buffer offset.
5313 * @param cArgs The number of arguments the function call takes.
5314 * It is presumed that the host register part of these have
5315 * been allocated as such already and won't need moving,
5316 * just freeing.
5317 * @param fKeepVars Mask of variables that should keep their register
5318 * assignments. Caller must take care to handle these.
5319 */
5320DECL_HIDDEN_THROW(uint32_t)
5321iemNativeSimdRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
5322{
5323 Assert(!cArgs); RT_NOREF(cArgs);
5324
5325 /* fKeepVars will reduce this mask. */
5326 uint32_t fSimdRegsToFree = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
5327
5328 /*
5329 * Move anything important out of volatile registers.
5330 */
5331 uint32_t fSimdRegsToMove = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5332#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
5333 & ~RT_BIT_32(IEMNATIVE_SIMD_REG_FIXED_TMP0)
5334#endif
5335 ;
5336
5337 fSimdRegsToMove &= pReNative->Core.bmHstSimdRegs;
5338 if (!fSimdRegsToMove)
5339 { /* likely */ }
5340 else
5341 {
5342 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: fSimdRegsToMove=%#x\n", fSimdRegsToMove));
5343 while (fSimdRegsToMove != 0)
5344 {
5345 unsigned const idxSimdReg = ASMBitFirstSetU32(fSimdRegsToMove) - 1;
5346 fSimdRegsToMove &= ~RT_BIT_32(idxSimdReg);
5347
5348 switch (pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat)
5349 {
5350 case kIemNativeWhat_Var:
5351 {
5352 uint8_t const idxVar = pReNative->Core.aHstRegs[idxSimdReg].idxVar;
5353 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5354 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
5355 Assert(pVar->idxReg == idxSimdReg);
5356 Assert(pVar->fSimdReg);
5357 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
5358 {
5359 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxSimdReg=%d\n",
5360 idxVar, pVar->enmKind, pVar->idxReg));
5361 if (pVar->enmKind != kIemNativeVarKind_Stack)
5362 pVar->idxReg = UINT8_MAX;
5363 else
5364 off = iemNativeSimdRegMoveOrSpillStackVar(pReNative, off, idxVar);
5365 }
5366 else
5367 fSimdRegsToFree &= ~RT_BIT_32(idxSimdReg);
5368 continue;
5369 }
5370
5371 case kIemNativeWhat_Arg:
5372 AssertMsgFailed(("What?!?: %u\n", idxSimdReg));
5373 continue;
5374
5375 case kIemNativeWhat_rc:
5376 case kIemNativeWhat_Tmp:
5377 AssertMsgFailed(("Missing free: %u\n", idxSimdReg));
5378 continue;
5379
5380 case kIemNativeWhat_FixedReserved:
5381#ifdef RT_ARCH_ARM64
5382 continue; /* On ARM the upper half of the virtual 256-bit register. */
5383#endif
5384
5385 case kIemNativeWhat_FixedTmp:
5386 case kIemNativeWhat_pVCpuFixed:
5387 case kIemNativeWhat_pCtxFixed:
5388 case kIemNativeWhat_PcShadow:
5389 case kIemNativeWhat_Invalid:
5390 case kIemNativeWhat_End:
5391 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
5392 }
5393 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
5394 }
5395 }
5396
5397 /*
5398 * Do the actual freeing.
5399 */
5400 if (pReNative->Core.bmHstSimdRegs & fSimdRegsToFree)
5401 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegs %#x -> %#x\n",
5402 pReNative->Core.bmHstSimdRegs, pReNative->Core.bmHstSimdRegs & ~fSimdRegsToFree));
5403 pReNative->Core.bmHstSimdRegs &= ~fSimdRegsToFree;
5404
5405 /* If there are guest register shadows in any call-volatile register, we
5406 have to clear the corrsponding guest register masks for each register. */
5407 uint32_t fHstSimdRegsWithGstShadow = pReNative->Core.bmHstSimdRegsWithGstShadow & fSimdRegsToFree;
5408 if (fHstSimdRegsWithGstShadow)
5409 {
5410 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
5411 pReNative->Core.bmHstSimdRegsWithGstShadow, pReNative->Core.bmHstSimdRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK, fHstSimdRegsWithGstShadow));
5412 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~fHstSimdRegsWithGstShadow;
5413 do
5414 {
5415 unsigned const idxSimdReg = ASMBitFirstSetU32(fHstSimdRegsWithGstShadow) - 1;
5416 fHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxSimdReg);
5417
5418 AssertMsg(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows != 0, ("idxSimdReg=%#x\n", idxSimdReg));
5419 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5420 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows));
5421
5422 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows;
5423 pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows = 0;
5424 } while (fHstSimdRegsWithGstShadow != 0);
5425 }
5426
5427 return off;
5428}
5429#endif
5430
5431
5432/**
5433 * Called right before emitting a call instruction to move anything important
5434 * out of call-volatile registers, free and flush the call-volatile registers,
5435 * optionally freeing argument variables.
5436 *
5437 * @returns New code buffer offset, UINT32_MAX on failure.
5438 * @param pReNative The native recompile state.
5439 * @param off The code buffer offset.
5440 * @param cArgs The number of arguments the function call takes.
5441 * It is presumed that the host register part of these have
5442 * been allocated as such already and won't need moving,
5443 * just freeing.
5444 * @param fKeepVars Mask of variables that should keep their register
5445 * assignments. Caller must take care to handle these.
5446 */
5447DECL_HIDDEN_THROW(uint32_t)
5448iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
5449{
5450 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
5451
5452 /* fKeepVars will reduce this mask. */
5453 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
5454
5455 /*
5456 * Move anything important out of volatile registers.
5457 */
5458 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
5459 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
5460 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
5461#ifdef IEMNATIVE_REG_FIXED_TMP0
5462 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
5463#endif
5464#ifdef IEMNATIVE_REG_FIXED_TMP1
5465 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
5466#endif
5467#ifdef IEMNATIVE_REG_FIXED_PC_DBG
5468 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
5469#endif
5470 & ~g_afIemNativeCallRegs[cArgs];
5471
5472 fRegsToMove &= pReNative->Core.bmHstRegs;
5473 if (!fRegsToMove)
5474 { /* likely */ }
5475 else
5476 {
5477 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
5478 while (fRegsToMove != 0)
5479 {
5480 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
5481 fRegsToMove &= ~RT_BIT_32(idxReg);
5482
5483 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
5484 {
5485 case kIemNativeWhat_Var:
5486 {
5487 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
5488 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5489 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
5490 Assert(pVar->idxReg == idxReg);
5491#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5492 Assert(!pVar->fSimdReg);
5493#endif
5494 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
5495 {
5496 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
5497 idxVar, pVar->enmKind, pVar->idxReg));
5498 if (pVar->enmKind != kIemNativeVarKind_Stack)
5499 pVar->idxReg = UINT8_MAX;
5500 else
5501 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
5502 }
5503 else
5504 fRegsToFree &= ~RT_BIT_32(idxReg);
5505 continue;
5506 }
5507
5508 case kIemNativeWhat_Arg:
5509 AssertMsgFailed(("What?!?: %u\n", idxReg));
5510 continue;
5511
5512 case kIemNativeWhat_rc:
5513 case kIemNativeWhat_Tmp:
5514 AssertMsgFailed(("Missing free: %u\n", idxReg));
5515 continue;
5516
5517 case kIemNativeWhat_FixedTmp:
5518 case kIemNativeWhat_pVCpuFixed:
5519 case kIemNativeWhat_pCtxFixed:
5520 case kIemNativeWhat_PcShadow:
5521 case kIemNativeWhat_FixedReserved:
5522 case kIemNativeWhat_Invalid:
5523 case kIemNativeWhat_End:
5524 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
5525 }
5526 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
5527 }
5528 }
5529
5530 /*
5531 * Do the actual freeing.
5532 */
5533 if (pReNative->Core.bmHstRegs & fRegsToFree)
5534 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
5535 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
5536 pReNative->Core.bmHstRegs &= ~fRegsToFree;
5537
5538 /* If there are guest register shadows in any call-volatile register, we
5539 have to clear the corrsponding guest register masks for each register. */
5540 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
5541 if (fHstRegsWithGstShadow)
5542 {
5543 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
5544 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
5545 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
5546 do
5547 {
5548 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
5549 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5550
5551 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
5552#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5553 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
5554#endif
5555 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
5556 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
5557 } while (fHstRegsWithGstShadow != 0);
5558 }
5559
5560#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5561 /* Now for the SIMD registers, no argument support for now. */
5562 off = iemNativeSimdRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /*cArgs*/, fKeepVars);
5563#endif
5564
5565 return off;
5566}
5567
5568
5569/**
5570 * Flushes a set of guest register shadow copies.
5571 *
5572 * This is usually done after calling a threaded function or a C-implementation
5573 * of an instruction.
5574 *
5575 * @param pReNative The native recompile state.
5576 * @param fGstRegs Set of guest registers to flush.
5577 */
5578DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
5579{
5580 /*
5581 * Reduce the mask by what's currently shadowed
5582 */
5583 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
5584 fGstRegs &= bmGstRegShadowsOld;
5585 if (fGstRegs)
5586 {
5587 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
5588 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
5589 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
5590 if (bmGstRegShadowsNew)
5591 {
5592 /*
5593 * Partial.
5594 */
5595 do
5596 {
5597 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5598 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5599 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5600 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
5601 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5602#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5603 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
5604#endif
5605
5606 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
5607 fGstRegs &= ~fInThisHstReg;
5608 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5609 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5610 if (!fGstRegShadowsNew)
5611 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5612 } while (fGstRegs != 0);
5613 }
5614 else
5615 {
5616 /*
5617 * Clear all.
5618 */
5619 do
5620 {
5621 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5622 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5623 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5624 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
5625 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5626#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5627 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
5628#endif
5629
5630 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5631 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5632 } while (fGstRegs != 0);
5633 pReNative->Core.bmHstRegsWithGstShadow = 0;
5634 }
5635 }
5636}
5637
5638
5639/**
5640 * Flushes guest register shadow copies held by a set of host registers.
5641 *
5642 * This is used with the TLB lookup code for ensuring that we don't carry on
5643 * with any guest shadows in volatile registers, as these will get corrupted by
5644 * a TLB miss.
5645 *
5646 * @param pReNative The native recompile state.
5647 * @param fHstRegs Set of host registers to flush guest shadows for.
5648 */
5649DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
5650{
5651 /*
5652 * Reduce the mask by what's currently shadowed.
5653 */
5654 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
5655 fHstRegs &= bmHstRegsWithGstShadowOld;
5656 if (fHstRegs)
5657 {
5658 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
5659 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
5660 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
5661 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
5662 if (bmHstRegsWithGstShadowNew)
5663 {
5664 /*
5665 * Partial (likely).
5666 */
5667 uint64_t fGstShadows = 0;
5668 do
5669 {
5670 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5671 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
5672 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
5673 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
5674#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5675 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5676#endif
5677
5678 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5679 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5680 fHstRegs &= ~RT_BIT_32(idxHstReg);
5681 } while (fHstRegs != 0);
5682 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
5683 }
5684 else
5685 {
5686 /*
5687 * Clear all.
5688 */
5689 do
5690 {
5691 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5692 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
5693 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
5694 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
5695#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5696 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5697#endif
5698
5699 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5700 fHstRegs &= ~RT_BIT_32(idxHstReg);
5701 } while (fHstRegs != 0);
5702 pReNative->Core.bmGstRegShadows = 0;
5703 }
5704 }
5705}
5706
5707
5708/**
5709 * Restores guest shadow copies in volatile registers.
5710 *
5711 * This is used after calling a helper function (think TLB miss) to restore the
5712 * register state of volatile registers.
5713 *
5714 * @param pReNative The native recompile state.
5715 * @param off The code buffer offset.
5716 * @param fHstRegsActiveShadows Set of host registers which are allowed to
5717 * be active (allocated) w/o asserting. Hack.
5718 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
5719 * iemNativeVarRestoreVolatileRegsPostHlpCall()
5720 */
5721DECL_HIDDEN_THROW(uint32_t)
5722iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
5723{
5724 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
5725 if (fHstRegs)
5726 {
5727 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
5728 do
5729 {
5730 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5731
5732 /* It's not fatal if a register is active holding a variable that
5733 shadowing a guest register, ASSUMING all pending guest register
5734 writes were flushed prior to the helper call. However, we'll be
5735 emitting duplicate restores, so it wasts code space. */
5736 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
5737 RT_NOREF(fHstRegsActiveShadows);
5738
5739 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5740#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5741 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadows));
5742#endif
5743 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
5744 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
5745 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
5746
5747 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
5748 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
5749
5750 fHstRegs &= ~RT_BIT_32(idxHstReg);
5751 } while (fHstRegs != 0);
5752 }
5753 return off;
5754}
5755
5756
5757
5758
5759/*********************************************************************************************************************************
5760* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
5761*********************************************************************************************************************************/
5762#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5763
5764/**
5765 * Info about shadowed guest SIMD register values.
5766 * @see IEMNATIVEGSTSIMDREG
5767 */
5768static struct
5769{
5770 /** Offset in VMCPU of XMM (low 128-bit) registers. */
5771 uint32_t offXmm;
5772 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
5773 uint32_t offYmm;
5774 /** Name (for logging). */
5775 const char *pszName;
5776} const g_aGstSimdShadowInfo[] =
5777{
5778#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
5779 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
5780 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
5781 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
5782 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
5783 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
5784 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
5785 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
5786 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
5787 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
5788 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
5789 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
5790 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
5791 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
5792 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
5793 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
5794 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
5795 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
5796#undef CPUMCTX_OFF_AND_SIZE
5797};
5798AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
5799
5800
5801/**
5802 * Frees a temporary SIMD register.
5803 *
5804 * Any shadow copies of guest registers assigned to the host register will not
5805 * be flushed by this operation.
5806 */
5807DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
5808{
5809 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
5810 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
5811 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5812 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
5813 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5814}
5815
5816
5817/**
5818 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
5819 *
5820 * @returns New code bufferoffset.
5821 * @param pReNative The native recompile state.
5822 * @param off Current code buffer position.
5823 * @param enmGstSimdReg The guest SIMD register to flush.
5824 */
5825DECL_HIDDEN_THROW(uint32_t)
5826iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
5827{
5828 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5829
5830 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
5831 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
5832 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg),
5833 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)));
5834
5835 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
5836 {
5837 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5838 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
5839 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5840 }
5841
5842 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))
5843 {
5844 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5845 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
5846 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5847 }
5848
5849 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, enmGstSimdReg);
5850 return off;
5851}
5852
5853
5854/**
5855 * Flush the given set of guest SIMD registers if marked as dirty.
5856 *
5857 * @returns New code buffer offset.
5858 * @param pReNative The native recompile state.
5859 * @param off Current code buffer position.
5860 * @param fFlushGstSimdReg The guest SIMD register set to flush (default is flush everything).
5861 */
5862DECL_HIDDEN_THROW(uint32_t)
5863iemNativeSimdRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstSimdReg /*= UINT64_MAX*/)
5864{
5865 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5866 & fFlushGstSimdReg;
5867 if (bmGstSimdRegShadowDirty)
5868 {
5869# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5870 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5871 iemNaitveDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
5872# endif
5873
5874 uint32_t idxGstSimdReg = 0;
5875 do
5876 {
5877 if (bmGstSimdRegShadowDirty & 0x1)
5878 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5879
5880 idxGstSimdReg++;
5881 bmGstSimdRegShadowDirty >>= 1;
5882 } while (bmGstSimdRegShadowDirty);
5883 }
5884
5885 return off;
5886}
5887
5888
5889/**
5890 * Locate a register, possibly freeing one up.
5891 *
5892 * This ASSUMES the caller has done the minimal/optimal allocation checks and
5893 * failed.
5894 *
5895 * @returns Host register number on success. Returns UINT8_MAX if no registers
5896 * found, the caller is supposed to deal with this and raise a
5897 * allocation type specific status code (if desired).
5898 *
5899 * @throws VBox status code if we're run into trouble spilling a variable of
5900 * recording debug info. Does NOT throw anything if we're out of
5901 * registers, though.
5902 */
5903static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
5904 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
5905{
5906 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFree);
5907 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5908 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5909
5910 /*
5911 * Try a freed register that's shadowing a guest register.
5912 */
5913 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
5914 if (fRegs)
5915 {
5916 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeNoVar);
5917
5918#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5919 /*
5920 * When we have livness information, we use it to kick out all shadowed
5921 * guest register that will not be needed any more in this TB. If we're
5922 * lucky, this may prevent us from ending up here again.
5923 *
5924 * Note! We must consider the previous entry here so we don't free
5925 * anything that the current threaded function requires (current
5926 * entry is produced by the next threaded function).
5927 */
5928 uint32_t const idxCurCall = pReNative->idxCurCall;
5929 if (idxCurCall > 0)
5930 {
5931 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
5932
5933# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5934 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
5935 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
5936 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
5937#else
5938 /* Construct a mask of the registers not in the read or write state.
5939 Note! We could skips writes, if they aren't from us, as this is just
5940 a hack to prevent trashing registers that have just been written
5941 or will be written when we retire the current instruction. */
5942 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5943 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5944 & IEMLIVENESSBIT_MASK;
5945#endif
5946 /* If it matches any shadowed registers. */
5947 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
5948 {
5949 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessUnshadowed);
5950 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
5951 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
5952
5953 /* See if we've got any unshadowed registers we can return now. */
5954 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
5955 if (fUnshadowedRegs)
5956 {
5957 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessHelped);
5958 return (fPreferVolatile
5959 ? ASMBitFirstSetU32(fUnshadowedRegs)
5960 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
5961 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
5962 - 1;
5963 }
5964 }
5965 }
5966#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5967
5968 unsigned const idxReg = (fPreferVolatile
5969 ? ASMBitFirstSetU32(fRegs)
5970 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5971 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
5972 - 1;
5973
5974 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
5975 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
5976 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5977 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
5978
5979 /* We need to flush any pending guest register writes this host SIMD register shadows. */
5980 uint32_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5981 uint32_t idxGstSimdReg = 0;
5982 do
5983 {
5984 if (fGstRegShadows & 0x1)
5985 {
5986 *poff = iemNativeSimdRegFlushPendingWrite(pReNative, *poff, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5987 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg));
5988 }
5989 idxGstSimdReg++;
5990 fGstRegShadows >>= 1;
5991 } while (fGstRegShadows);
5992
5993 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5994 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5995 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5996 pReNative->Core.aHstSimdRegs[idxReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5997 return idxReg;
5998 }
5999
6000 AssertFailed(); /** @todo The following needs testing when it actually gets hit. */
6001
6002 /*
6003 * Try free up a variable that's in a register.
6004 *
6005 * We do two rounds here, first evacuating variables we don't need to be
6006 * saved on the stack, then in the second round move things to the stack.
6007 */
6008 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeVar);
6009 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
6010 {
6011 uint32_t fVars = pReNative->Core.bmVars;
6012 while (fVars)
6013 {
6014 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
6015 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
6016 if (!pReNative->Core.aVars[idxVar].fSimdReg) /* Ignore non SIMD variables here. */
6017 continue;
6018
6019 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
6020 && (RT_BIT_32(idxReg) & fRegMask)
6021 && ( iLoop == 0
6022 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
6023 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
6024 && !pReNative->Core.aVars[idxVar].fRegAcquired)
6025 {
6026 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxReg));
6027 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
6028 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
6029 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstSimdReg_End));
6030 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg))
6031 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
6032
6033 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
6034 {
6035 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
6036 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
6037 }
6038
6039 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6040 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
6041
6042 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
6043 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
6044 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
6045 return idxReg;
6046 }
6047 fVars &= ~RT_BIT_32(idxVar);
6048 }
6049 }
6050
6051 AssertFailed();
6052 return UINT8_MAX;
6053}
6054
6055
6056/**
6057 * Flushes a set of guest register shadow copies.
6058 *
6059 * This is usually done after calling a threaded function or a C-implementation
6060 * of an instruction.
6061 *
6062 * @param pReNative The native recompile state.
6063 * @param fGstSimdRegs Set of guest SIMD registers to flush.
6064 */
6065DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
6066{
6067 /*
6068 * Reduce the mask by what's currently shadowed
6069 */
6070 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
6071 fGstSimdRegs &= bmGstSimdRegShadows;
6072 if (fGstSimdRegs)
6073 {
6074 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
6075 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
6076 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
6077 if (bmGstSimdRegShadowsNew)
6078 {
6079 /*
6080 * Partial.
6081 */
6082 do
6083 {
6084 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
6085 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
6086 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
6087 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
6088 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
6089 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
6090
6091 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
6092 fGstSimdRegs &= ~fInThisHstReg;
6093 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
6094 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
6095 if (!fGstRegShadowsNew)
6096 {
6097 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
6098 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
6099 }
6100 } while (fGstSimdRegs != 0);
6101 }
6102 else
6103 {
6104 /*
6105 * Clear all.
6106 */
6107 do
6108 {
6109 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
6110 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
6111 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
6112 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
6113 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
6114 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
6115
6116 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
6117 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
6118 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
6119 } while (fGstSimdRegs != 0);
6120 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
6121 }
6122 }
6123}
6124
6125
6126/**
6127 * Allocates a temporary host SIMD register.
6128 *
6129 * This may emit code to save register content onto the stack in order to free
6130 * up a register.
6131 *
6132 * @returns The host register number; throws VBox status code on failure,
6133 * so no need to check the return value.
6134 * @param pReNative The native recompile state.
6135 * @param poff Pointer to the variable with the code buffer position.
6136 * This will be update if we need to move a variable from
6137 * register to stack in order to satisfy the request.
6138 * @param fPreferVolatile Whether to prefer volatile over non-volatile
6139 * registers (@c true, default) or the other way around
6140 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
6141 */
6142DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
6143{
6144 /*
6145 * Try find a completely unused register, preferably a call-volatile one.
6146 */
6147 uint8_t idxSimdReg;
6148 uint32_t fRegs = ~pReNative->Core.bmHstRegs
6149 & ~pReNative->Core.bmHstRegsWithGstShadow
6150 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
6151 if (fRegs)
6152 {
6153 if (fPreferVolatile)
6154 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
6155 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
6156 else
6157 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
6158 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
6159 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
6160 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
6161
6162 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
6163 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
6164 }
6165 else
6166 {
6167 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
6168 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
6169 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
6170 }
6171
6172 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
6173 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
6174}
6175
6176
6177/**
6178 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
6179 * registers.
6180 *
6181 * @returns The host register number; throws VBox status code on failure,
6182 * so no need to check the return value.
6183 * @param pReNative The native recompile state.
6184 * @param poff Pointer to the variable with the code buffer position.
6185 * This will be update if we need to move a variable from
6186 * register to stack in order to satisfy the request.
6187 * @param fRegMask Mask of acceptable registers.
6188 * @param fPreferVolatile Whether to prefer volatile over non-volatile
6189 * registers (@c true, default) or the other way around
6190 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
6191 */
6192DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
6193 bool fPreferVolatile /*= true*/)
6194{
6195 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
6196 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
6197
6198 /*
6199 * Try find a completely unused register, preferably a call-volatile one.
6200 */
6201 uint8_t idxSimdReg;
6202 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
6203 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
6204 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
6205 & fRegMask;
6206 if (fRegs)
6207 {
6208 if (fPreferVolatile)
6209 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
6210 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
6211 else
6212 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
6213 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
6214 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
6215 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
6216
6217 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
6218 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
6219 }
6220 else
6221 {
6222 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
6223 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
6224 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
6225 }
6226
6227 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
6228 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
6229}
6230
6231
6232/**
6233 * Sets the indiactor for which part of the given SIMD register has valid data loaded.
6234 *
6235 * @param pReNative The native recompile state.
6236 * @param idxHstSimdReg The host SIMD register to update the state for.
6237 * @param enmLoadSz The load size to set.
6238 */
6239DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg,
6240 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6241{
6242 /* Everything valid already? -> nothing to do. */
6243 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
6244 return;
6245
6246 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid)
6247 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
6248 else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz)
6249 {
6250 Assert( ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128
6251 && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
6252 || ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128
6253 && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128));
6254 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256;
6255 }
6256}
6257
6258
6259static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdRegDst,
6260 uint8_t idxHstSimdRegDst, uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
6261{
6262 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
6263 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst
6264 || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
6265 {
6266# ifdef RT_ARCH_ARM64
6267 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
6268 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
6269# endif
6270
6271 if (idxHstSimdRegDst != idxHstSimdRegSrc)
6272 {
6273 switch (enmLoadSzDst)
6274 {
6275 case kIemNativeGstSimdRegLdStSz_256:
6276 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
6277 break;
6278 case kIemNativeGstSimdRegLdStSz_Low128:
6279 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
6280 break;
6281 case kIemNativeGstSimdRegLdStSz_High128:
6282 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
6283 break;
6284 default:
6285 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6286 }
6287
6288 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst);
6289 }
6290 }
6291 else
6292 {
6293 /* The source doesn't has the part loaded, so load the register from CPUMCTX. */
6294 Assert(enmLoadSzDst == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSzDst == kIemNativeGstSimdRegLdStSz_High128);
6295 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, idxHstSimdRegDst, enmGstSimdRegDst, enmLoadSzDst);
6296 }
6297
6298 return off;
6299}
6300
6301
6302/**
6303 * Allocates a temporary host SIMD register for keeping a guest
6304 * SIMD register value.
6305 *
6306 * Since we may already have a register holding the guest register value,
6307 * code will be emitted to do the loading if that's not the case. Code may also
6308 * be emitted if we have to free up a register to satify the request.
6309 *
6310 * @returns The host register number; throws VBox status code on failure, so no
6311 * need to check the return value.
6312 * @param pReNative The native recompile state.
6313 * @param poff Pointer to the variable with the code buffer
6314 * position. This will be update if we need to move a
6315 * variable from register to stack in order to satisfy
6316 * the request.
6317 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
6318 * @param enmIntendedUse How the caller will be using the host register.
6319 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
6320 * register is okay (default). The ASSUMPTION here is
6321 * that the caller has already flushed all volatile
6322 * registers, so this is only applied if we allocate a
6323 * new register.
6324 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
6325 */
6326DECL_HIDDEN_THROW(uint8_t)
6327iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
6328 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
6329 bool fNoVolatileRegs /*= false*/)
6330{
6331 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
6332#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
6333 AssertMsg( pReNative->idxCurCall == 0
6334 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
6335 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
6336 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
6337 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
6338 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
6339 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
6340#endif
6341#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
6342 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
6343#endif
6344 uint32_t const fRegMask = !fNoVolatileRegs
6345 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
6346 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
6347
6348 /*
6349 * First check if the guest register value is already in a host register.
6350 */
6351 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
6352 {
6353 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
6354 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
6355 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
6356 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
6357
6358 /* It's not supposed to be allocated... */
6359 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
6360 {
6361 /*
6362 * If the register will trash the guest shadow copy, try find a
6363 * completely unused register we can use instead. If that fails,
6364 * we need to disassociate the host reg from the guest reg.
6365 */
6366 /** @todo would be nice to know if preserving the register is in any way helpful. */
6367 /* If the purpose is calculations, try duplicate the register value as
6368 we'll be clobbering the shadow. */
6369 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
6370 && ( ~pReNative->Core.bmHstSimdRegs
6371 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
6372 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
6373 {
6374 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
6375
6376 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
6377
6378 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
6379 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
6380 g_apszIemNativeHstSimdRegNames[idxRegNew]));
6381 idxSimdReg = idxRegNew;
6382 }
6383 /* If the current register matches the restrictions, go ahead and allocate
6384 it for the caller. */
6385 else if (fRegMask & RT_BIT_32(idxSimdReg))
6386 {
6387 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
6388 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
6389 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
6390 {
6391 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
6392 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxSimdReg, idxSimdReg, enmLoadSz);
6393 else
6394 iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz);
6395 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
6396 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
6397 }
6398 else
6399 {
6400 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
6401 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
6402 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
6403 }
6404 }
6405 /* Otherwise, allocate a register that satisfies the caller and transfer
6406 the shadowing if compatible with the intended use. (This basically
6407 means the call wants a non-volatile register (RSP push/pop scenario).) */
6408 else
6409 {
6410 Assert(fNoVolatileRegs);
6411 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
6412 !fNoVolatileRegs
6413 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
6414 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
6415 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
6416 {
6417 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
6418 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
6419 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
6420 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
6421 }
6422 else
6423 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
6424 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
6425 g_apszIemNativeHstSimdRegNames[idxRegNew]));
6426 idxSimdReg = idxRegNew;
6427 }
6428 }
6429 else
6430 {
6431 /*
6432 * Oops. Shadowed guest register already allocated!
6433 *
6434 * Allocate a new register, copy the value and, if updating, the
6435 * guest shadow copy assignment to the new register.
6436 */
6437 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
6438 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
6439 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
6440 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
6441
6442 /** @todo share register for readonly access. */
6443 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
6444 enmIntendedUse == kIemNativeGstRegUse_Calculation);
6445
6446 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
6447 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
6448 else
6449 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
6450
6451 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
6452 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
6453 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
6454 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
6455 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
6456 else
6457 {
6458 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
6459 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
6460 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
6461 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
6462 }
6463 idxSimdReg = idxRegNew;
6464 }
6465 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
6466
6467#ifdef VBOX_STRICT
6468 /* Strict builds: Check that the value is correct. */
6469 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
6470 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
6471#endif
6472
6473 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
6474 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
6475 {
6476# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6477 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
6478 iemNaitveDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxSimdReg);
6479# endif
6480
6481 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
6482 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
6483 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
6484 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
6485 else
6486 {
6487 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
6488 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
6489 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
6490 }
6491 }
6492
6493 return idxSimdReg;
6494 }
6495
6496 /*
6497 * Allocate a new register, load it with the guest value and designate it as a copy of the
6498 */
6499 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
6500
6501 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
6502 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
6503 else
6504 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
6505
6506 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
6507 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
6508
6509 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
6510 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
6511 {
6512# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6513 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
6514 iemNaitveDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxRegNew);
6515# endif
6516
6517 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
6518 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
6519 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
6520 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
6521 else
6522 {
6523 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
6524 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
6525 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
6526 }
6527 }
6528
6529 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
6530 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
6531
6532 return idxRegNew;
6533}
6534
6535
6536/**
6537 * Flushes guest SIMD register shadow copies held by a set of host registers.
6538 *
6539 * This is used whenever calling an external helper for ensuring that we don't carry on
6540 * with any guest shadows in volatile registers, as these will get corrupted by the caller.
6541 *
6542 * @param pReNative The native recompile state.
6543 * @param fHstSimdRegs Set of host SIMD registers to flush guest shadows for.
6544 */
6545DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstSimdRegs) RT_NOEXCEPT
6546{
6547 /*
6548 * Reduce the mask by what's currently shadowed.
6549 */
6550 uint32_t const bmHstSimdRegsWithGstShadowOld = pReNative->Core.bmHstSimdRegsWithGstShadow;
6551 fHstSimdRegs &= bmHstSimdRegsWithGstShadowOld;
6552 if (fHstSimdRegs)
6553 {
6554 uint32_t const bmHstSimdRegsWithGstShadowNew = bmHstSimdRegsWithGstShadowOld & ~fHstSimdRegs;
6555 Log12(("iemNativeSimdRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
6556 fHstSimdRegs, bmHstSimdRegsWithGstShadowOld, bmHstSimdRegsWithGstShadowNew));
6557 pReNative->Core.bmHstSimdRegsWithGstShadow = bmHstSimdRegsWithGstShadowNew;
6558 if (bmHstSimdRegsWithGstShadowNew)
6559 {
6560 /*
6561 * Partial (likely).
6562 */
6563 uint64_t fGstShadows = 0;
6564 do
6565 {
6566 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
6567 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
6568 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
6569 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
6570 Assert(!(( pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
6571 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
6572
6573 fGstShadows |= pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
6574 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
6575 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
6576 } while (fHstSimdRegs != 0);
6577 pReNative->Core.bmGstSimdRegShadows &= ~fGstShadows;
6578 }
6579 else
6580 {
6581 /*
6582 * Clear all.
6583 */
6584 do
6585 {
6586 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
6587 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
6588 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
6589 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
6590 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
6591 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
6592
6593 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
6594 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
6595 } while (fHstSimdRegs != 0);
6596 pReNative->Core.bmGstSimdRegShadows = 0;
6597 }
6598 }
6599}
6600#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6601
6602
6603
6604/*********************************************************************************************************************************
6605* Code emitters for flushing pending guest register writes and sanity checks *
6606*********************************************************************************************************************************/
6607
6608#ifdef VBOX_STRICT
6609/**
6610 * Does internal register allocator sanity checks.
6611 */
6612DECLHIDDEN(void) iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
6613{
6614 /*
6615 * Iterate host registers building a guest shadowing set.
6616 */
6617 uint64_t bmGstRegShadows = 0;
6618 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
6619 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
6620 while (bmHstRegsWithGstShadow)
6621 {
6622 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
6623 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
6624 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
6625
6626 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
6627 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
6628 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
6629 bmGstRegShadows |= fThisGstRegShadows;
6630 while (fThisGstRegShadows)
6631 {
6632 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
6633 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
6634 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
6635 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
6636 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
6637 }
6638 }
6639 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
6640 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
6641 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
6642
6643 /*
6644 * Now the other way around, checking the guest to host index array.
6645 */
6646 bmHstRegsWithGstShadow = 0;
6647 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
6648 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
6649 while (bmGstRegShadows)
6650 {
6651 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
6652 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
6653 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
6654
6655 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
6656 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
6657 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
6658 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
6659 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
6660 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
6661 }
6662 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
6663 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
6664 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
6665}
6666#endif /* VBOX_STRICT */
6667
6668
6669/**
6670 * Flushes any delayed guest register writes.
6671 *
6672 * This must be called prior to calling CImpl functions and any helpers that use
6673 * the guest state (like raising exceptions) and such.
6674 *
6675 * This optimization has not yet been implemented. The first target would be
6676 * RIP updates, since these are the most common ones.
6677 *
6678 * @note This function does not flush any shadowing information for guest registers. This needs to be done by
6679 * the caller if it wishes to do so.
6680 */
6681DECL_HIDDEN_THROW(uint32_t)
6682iemNativeRegFlushPendingWritesSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept, uint64_t fGstSimdShwExcept)
6683{
6684#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6685 if (!(fGstShwExcept & kIemNativeGstReg_Pc))
6686 off = iemNativeEmitPcWriteback(pReNative, off);
6687#else
6688 RT_NOREF(pReNative, fGstShwExcept);
6689#endif
6690
6691#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
6692 off = iemNativeRegFlushDirtyGuest(pReNative, off, ~fGstShwExcept);
6693#endif
6694
6695#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6696 off = iemNativeSimdRegFlushDirtyGuest(pReNative, off, ~fGstSimdShwExcept);
6697#endif
6698
6699 return off;
6700}
6701
6702
6703#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6704/**
6705 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
6706 */
6707DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcWritebackSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6708{
6709 Assert(pReNative->Core.offPc);
6710# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6711 iemNativeDbgInfoAddNativeOffset(pReNative, off);
6712 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, pReNative->Core.cInstrPcUpdateSkipped);
6713# endif
6714
6715# ifndef IEMNATIVE_REG_FIXED_PC_DBG
6716 /* Allocate a temporary PC register. */
6717 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6718
6719 /* Perform the addition and store the result. */
6720 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
6721 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6722
6723 /* Free but don't flush the PC register. */
6724 iemNativeRegFreeTmp(pReNative, idxPcReg);
6725# else
6726 /* Compare the shadow with the context value, they should match. */
6727 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
6728 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
6729# endif
6730
6731 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, pReNative->Core.cInstrPcUpdateSkipped);
6732 pReNative->Core.offPc = 0;
6733 pReNative->Core.cInstrPcUpdateSkipped = 0;
6734
6735 return off;
6736}
6737#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
6738
6739
6740/*********************************************************************************************************************************
6741* Code Emitters (larger snippets) *
6742*********************************************************************************************************************************/
6743
6744/**
6745 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
6746 * extending to 64-bit width.
6747 *
6748 * @returns New code buffer offset on success, UINT32_MAX on failure.
6749 * @param pReNative .
6750 * @param off The current code buffer position.
6751 * @param idxHstReg The host register to load the guest register value into.
6752 * @param enmGstReg The guest register to load.
6753 *
6754 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6755 * that is something the caller needs to do if applicable.
6756 */
6757DECL_HIDDEN_THROW(uint32_t)
6758iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
6759{
6760 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
6761 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
6762
6763 switch (g_aGstShadowInfo[enmGstReg].cb)
6764 {
6765 case sizeof(uint64_t):
6766 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6767 case sizeof(uint32_t):
6768 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6769 case sizeof(uint16_t):
6770 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6771#if 0 /* not present in the table. */
6772 case sizeof(uint8_t):
6773 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6774#endif
6775 default:
6776 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6777 }
6778}
6779
6780
6781#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6782/**
6783 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
6784 *
6785 * @returns New code buffer offset on success, UINT32_MAX on failure.
6786 * @param pReNative The recompiler state.
6787 * @param off The current code buffer position.
6788 * @param idxHstSimdReg The host register to load the guest register value into.
6789 * @param enmGstSimdReg The guest register to load.
6790 * @param enmLoadSz The load size of the register.
6791 *
6792 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6793 * that is something the caller needs to do if applicable.
6794 */
6795DECL_HIDDEN_THROW(uint32_t)
6796iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
6797 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6798{
6799 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
6800
6801 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz);
6802 switch (enmLoadSz)
6803 {
6804 case kIemNativeGstSimdRegLdStSz_256:
6805 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6806 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6807 case kIemNativeGstSimdRegLdStSz_Low128:
6808 return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6809 case kIemNativeGstSimdRegLdStSz_High128:
6810 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6811 default:
6812 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6813 }
6814}
6815#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6816
6817#ifdef VBOX_STRICT
6818
6819/**
6820 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
6821 *
6822 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6823 * Trashes EFLAGS on AMD64.
6824 */
6825DECL_HIDDEN_THROW(uint32_t)
6826iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
6827{
6828# ifdef RT_ARCH_AMD64
6829 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6830
6831 /* rol reg64, 32 */
6832 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6833 pbCodeBuf[off++] = 0xc1;
6834 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6835 pbCodeBuf[off++] = 32;
6836
6837 /* test reg32, ffffffffh */
6838 if (idxReg >= 8)
6839 pbCodeBuf[off++] = X86_OP_REX_B;
6840 pbCodeBuf[off++] = 0xf7;
6841 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6842 pbCodeBuf[off++] = 0xff;
6843 pbCodeBuf[off++] = 0xff;
6844 pbCodeBuf[off++] = 0xff;
6845 pbCodeBuf[off++] = 0xff;
6846
6847 /* je/jz +1 */
6848 pbCodeBuf[off++] = 0x74;
6849 pbCodeBuf[off++] = 0x01;
6850
6851 /* int3 */
6852 pbCodeBuf[off++] = 0xcc;
6853
6854 /* rol reg64, 32 */
6855 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6856 pbCodeBuf[off++] = 0xc1;
6857 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6858 pbCodeBuf[off++] = 32;
6859
6860# elif defined(RT_ARCH_ARM64)
6861 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6862 /* lsr tmp0, reg64, #32 */
6863 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
6864 /* cbz tmp0, +1 */
6865 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6866 /* brk #0x1100 */
6867 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
6868
6869# else
6870# error "Port me!"
6871# endif
6872 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6873 return off;
6874}
6875
6876
6877/**
6878 * Emitting code that checks that the content of register @a idxReg is the same
6879 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
6880 * instruction if that's not the case.
6881 *
6882 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6883 * Trashes EFLAGS on AMD64.
6884 */
6885DECL_HIDDEN_THROW(uint32_t)
6886iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
6887{
6888#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6889 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
6890 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg))
6891 return off;
6892#endif
6893
6894# ifdef RT_ARCH_AMD64
6895 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6896
6897 /* cmp reg, [mem] */
6898 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
6899 {
6900 if (idxReg >= 8)
6901 pbCodeBuf[off++] = X86_OP_REX_R;
6902 pbCodeBuf[off++] = 0x38;
6903 }
6904 else
6905 {
6906 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
6907 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
6908 else
6909 {
6910 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
6911 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6912 else
6913 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
6914 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
6915 if (idxReg >= 8)
6916 pbCodeBuf[off++] = X86_OP_REX_R;
6917 }
6918 pbCodeBuf[off++] = 0x39;
6919 }
6920 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
6921
6922 /* je/jz +1 */
6923 pbCodeBuf[off++] = 0x74;
6924 pbCodeBuf[off++] = 0x01;
6925
6926 /* int3 */
6927 pbCodeBuf[off++] = 0xcc;
6928
6929 /* For values smaller than the register size, we must check that the rest
6930 of the register is all zeros. */
6931 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
6932 {
6933 /* test reg64, imm32 */
6934 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6935 pbCodeBuf[off++] = 0xf7;
6936 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6937 pbCodeBuf[off++] = 0;
6938 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
6939 pbCodeBuf[off++] = 0xff;
6940 pbCodeBuf[off++] = 0xff;
6941
6942 /* je/jz +1 */
6943 pbCodeBuf[off++] = 0x74;
6944 pbCodeBuf[off++] = 0x01;
6945
6946 /* int3 */
6947 pbCodeBuf[off++] = 0xcc;
6948 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6949 }
6950 else
6951 {
6952 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6953 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
6954 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
6955 }
6956
6957# elif defined(RT_ARCH_ARM64)
6958 /* mov TMP0, [gstreg] */
6959 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
6960
6961 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6962 /* sub tmp0, tmp0, idxReg */
6963 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
6964 /* cbz tmp0, +1 */
6965 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6966 /* brk #0x1000+enmGstReg */
6967 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
6968 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6969
6970# else
6971# error "Port me!"
6972# endif
6973 return off;
6974}
6975
6976
6977# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6978# ifdef RT_ARCH_AMD64
6979/**
6980 * Helper for AMD64 to emit code which checks the low 128-bits of the given SIMD register against the given vCPU offset.
6981 */
6982DECL_FORCE_INLINE_THROW(uint32_t) iemNativeEmitGuestSimdRegValueCheckVCpuU128(uint8_t * const pbCodeBuf, uint32_t off, uint8_t idxSimdReg, uint32_t offVCpu)
6983{
6984 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
6985 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6986 if (idxSimdReg >= 8)
6987 pbCodeBuf[off++] = X86_OP_REX_R;
6988 pbCodeBuf[off++] = 0x0f;
6989 pbCodeBuf[off++] = 0x38;
6990 pbCodeBuf[off++] = 0x29;
6991 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxSimdReg, offVCpu);
6992
6993 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
6994 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6995 pbCodeBuf[off++] = X86_OP_REX_W
6996 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
6997 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6998 pbCodeBuf[off++] = 0x0f;
6999 pbCodeBuf[off++] = 0x3a;
7000 pbCodeBuf[off++] = 0x16;
7001 pbCodeBuf[off++] = 0xeb;
7002 pbCodeBuf[off++] = 0x00;
7003
7004 /* cmp tmp0, 0xffffffffffffffff. */
7005 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
7006 pbCodeBuf[off++] = 0x83;
7007 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
7008 pbCodeBuf[off++] = 0xff;
7009
7010 /* je/jz +1 */
7011 pbCodeBuf[off++] = 0x74;
7012 pbCodeBuf[off++] = 0x01;
7013
7014 /* int3 */
7015 pbCodeBuf[off++] = 0xcc;
7016
7017 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
7018 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7019 pbCodeBuf[off++] = X86_OP_REX_W
7020 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
7021 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
7022 pbCodeBuf[off++] = 0x0f;
7023 pbCodeBuf[off++] = 0x3a;
7024 pbCodeBuf[off++] = 0x16;
7025 pbCodeBuf[off++] = 0xeb;
7026 pbCodeBuf[off++] = 0x01;
7027
7028 /* cmp tmp0, 0xffffffffffffffff. */
7029 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
7030 pbCodeBuf[off++] = 0x83;
7031 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
7032 pbCodeBuf[off++] = 0xff;
7033
7034 /* je/jz +1 */
7035 pbCodeBuf[off++] = 0x74;
7036 pbCodeBuf[off++] = 0x01;
7037
7038 /* int3 */
7039 pbCodeBuf[off++] = 0xcc;
7040
7041 return off;
7042}
7043# endif
7044
7045
7046/**
7047 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
7048 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
7049 * instruction if that's not the case.
7050 *
7051 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
7052 * Trashes EFLAGS on AMD64.
7053 */
7054DECL_HIDDEN_THROW(uint32_t)
7055iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
7056 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
7057{
7058 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
7059 if ( ( enmLoadSz == kIemNativeGstSimdRegLdStSz_256
7060 && ( IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)
7061 || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
7062 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128
7063 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
7064 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_High128
7065 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
7066 return off;
7067
7068# ifdef RT_ARCH_AMD64
7069 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
7070 {
7071 /* movdqa vectmp0, idxSimdReg */
7072 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
7073
7074 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
7075
7076 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
7077 g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
7078 }
7079
7080 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
7081 {
7082 /* Due to the fact that CPUMCTX stores the high 128-bit separately we need to do this all over again for the high part. */
7083 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 50);
7084
7085 /* vextracti128 vectmp0, idxSimdReg, 1 */
7086 pbCodeBuf[off++] = X86_OP_VEX3;
7087 pbCodeBuf[off++] = (idxSimdReg < 8 ? X86_OP_VEX3_BYTE1_R : 0)
7088 | X86_OP_VEX3_BYTE1_X
7089 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? X86_OP_VEX3_BYTE1_B : 0)
7090 | 0x03; /* Opcode map */
7091 pbCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX3_BYTE2_P_066H);
7092 pbCodeBuf[off++] = 0x39;
7093 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxSimdReg & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
7094 pbCodeBuf[off++] = 0x01;
7095
7096 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
7097 g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
7098 }
7099# elif defined(RT_ARCH_ARM64)
7100 /* mov vectmp0, [gstreg] */
7101 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
7102
7103 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
7104 {
7105 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
7106 /* eor vectmp0, vectmp0, idxSimdReg */
7107 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
7108 /* uaddlv vectmp0, vectmp0.16B */
7109 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, kArmv8InstrUAddLVSz_16B);
7110 /* umov tmp0, vectmp0.H[0] */
7111 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
7112 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
7113 /* cbz tmp0, +1 */
7114 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
7115 /* brk #0x1000+enmGstReg */
7116 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
7117 }
7118
7119 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
7120 {
7121 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
7122 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
7123 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg + 1);
7124 /* uaddlv vectmp0 + 1, (vectmp0 + 1).16B */
7125 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, kArmv8InstrUAddLVSz_16B);
7126 /* umov tmp0, (vectmp0 + 1).H[0] */
7127 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
7128 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
7129 /* cbz tmp0, +1 */
7130 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
7131 /* brk #0x1000+enmGstReg */
7132 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
7133 }
7134
7135# else
7136# error "Port me!"
7137# endif
7138
7139 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7140 return off;
7141}
7142# endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
7143
7144
7145/**
7146 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
7147 * important bits.
7148 *
7149 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
7150 * Trashes EFLAGS on AMD64.
7151 */
7152DECL_HIDDEN_THROW(uint32_t)
7153iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
7154{
7155 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
7156 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
7157 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
7158 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
7159
7160#ifdef RT_ARCH_AMD64
7161 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
7162
7163 /* je/jz +1 */
7164 pbCodeBuf[off++] = 0x74;
7165 pbCodeBuf[off++] = 0x01;
7166
7167 /* int3 */
7168 pbCodeBuf[off++] = 0xcc;
7169
7170# elif defined(RT_ARCH_ARM64)
7171 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7172
7173 /* b.eq +1 */
7174 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
7175 /* brk #0x2000 */
7176 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
7177
7178# else
7179# error "Port me!"
7180# endif
7181 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7182
7183 iemNativeRegFreeTmp(pReNative, idxRegTmp);
7184 return off;
7185}
7186
7187#endif /* VBOX_STRICT */
7188
7189
7190#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
7191/**
7192 * Worker for IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK.
7193 */
7194DECL_HIDDEN_THROW(uint32_t)
7195iemNativeEmitEFlagsSkippingCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflNeeded)
7196{
7197 uint32_t const offVCpu = RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags);
7198
7199 fEflNeeded &= X86_EFL_STATUS_BITS;
7200 if (fEflNeeded)
7201 {
7202# ifdef RT_ARCH_AMD64
7203 /* test dword [pVCpu + offVCpu], imm32 */
7204 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7205 if (fEflNeeded <= 0xff)
7206 {
7207 pCodeBuf[off++] = 0xf6;
7208 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
7209 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
7210 }
7211 else
7212 {
7213 pCodeBuf[off++] = 0xf7;
7214 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
7215 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
7216 pCodeBuf[off++] = RT_BYTE2(fEflNeeded);
7217 pCodeBuf[off++] = RT_BYTE3(fEflNeeded);
7218 pCodeBuf[off++] = RT_BYTE4(fEflNeeded);
7219 }
7220 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7221
7222# else
7223 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
7224 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, offVCpu);
7225 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp, fEflNeeded);
7226# ifdef RT_ARCH_ARM64
7227 off = iemNativeEmitJzToFixed(pReNative, off, off + 2);
7228 off = iemNativeEmitBrk(pReNative, off, 0x7777);
7229# else
7230# error "Port me!"
7231# endif
7232 iemNativeRegFreeTmp(pReNative, idxRegTmp);
7233# endif
7234 }
7235 return off;
7236}
7237#endif /* IEMNATIVE_STRICT_EFLAGS_SKIPPING */
7238
7239
7240/**
7241 * Emits a code for checking the return code of a call and rcPassUp, returning
7242 * from the code if either are non-zero.
7243 */
7244DECL_HIDDEN_THROW(uint32_t)
7245iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
7246{
7247#ifdef RT_ARCH_AMD64
7248 /*
7249 * AMD64: eax = call status code.
7250 */
7251
7252 /* edx = rcPassUp */
7253 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
7254# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7255 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
7256# endif
7257
7258 /* edx = eax | rcPassUp */
7259 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7260 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
7261 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
7262 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7263
7264 /* Jump to non-zero status return path. */
7265 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
7266
7267 /* done. */
7268
7269#elif RT_ARCH_ARM64
7270 /*
7271 * ARM64: w0 = call status code.
7272 */
7273# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7274 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
7275# endif
7276 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
7277
7278 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
7279
7280 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
7281
7282 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
7283 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
7284 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
7285
7286#else
7287# error "port me"
7288#endif
7289 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7290 RT_NOREF_PV(idxInstr);
7291 return off;
7292}
7293
7294
7295/**
7296 * Emits code to check if the content of @a idxAddrReg is a canonical address,
7297 * raising a \#GP(0) if it isn't.
7298 *
7299 * @returns New code buffer offset, UINT32_MAX on failure.
7300 * @param pReNative The native recompile state.
7301 * @param off The code buffer offset.
7302 * @param idxAddrReg The host register with the address to check.
7303 * @param idxInstr The current instruction.
7304 */
7305DECL_HIDDEN_THROW(uint32_t)
7306iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
7307{
7308 /*
7309 * Make sure we don't have any outstanding guest register writes as we may
7310 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
7311 */
7312 off = iemNativeRegFlushPendingWrites(pReNative, off);
7313
7314#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7315 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7316#else
7317 RT_NOREF(idxInstr);
7318#endif
7319
7320#ifdef RT_ARCH_AMD64
7321 /*
7322 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
7323 * return raisexcpt();
7324 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
7325 */
7326 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
7327
7328 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
7329 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
7330 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
7331 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
7332 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
7333
7334 iemNativeRegFreeTmp(pReNative, iTmpReg);
7335
7336#elif defined(RT_ARCH_ARM64)
7337 /*
7338 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
7339 * return raisexcpt();
7340 * ----
7341 * mov x1, 0x800000000000
7342 * add x1, x0, x1
7343 * cmp xzr, x1, lsr 48
7344 * b.ne .Lraisexcpt
7345 */
7346 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
7347
7348 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
7349 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
7350 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
7351 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
7352
7353 iemNativeRegFreeTmp(pReNative, iTmpReg);
7354
7355#else
7356# error "Port me"
7357#endif
7358 return off;
7359}
7360
7361
7362/**
7363 * Emits code to check if that the content of @a idxAddrReg is within the limit
7364 * of CS, raising a \#GP(0) if it isn't.
7365 *
7366 * @returns New code buffer offset; throws VBox status code on error.
7367 * @param pReNative The native recompile state.
7368 * @param off The code buffer offset.
7369 * @param idxAddrReg The host register (32-bit) with the address to
7370 * check.
7371 * @param idxInstr The current instruction.
7372 */
7373DECL_HIDDEN_THROW(uint32_t)
7374iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7375 uint8_t idxAddrReg, uint8_t idxInstr)
7376{
7377 /*
7378 * Make sure we don't have any outstanding guest register writes as we may
7379 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
7380 */
7381 off = iemNativeRegFlushPendingWrites(pReNative, off);
7382
7383#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7384 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7385#else
7386 RT_NOREF(idxInstr);
7387#endif
7388
7389 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
7390 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
7391 kIemNativeGstRegUse_ReadOnly);
7392
7393 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
7394 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
7395
7396 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
7397 return off;
7398}
7399
7400
7401/**
7402 * Emits a call to a CImpl function or something similar.
7403 */
7404DECL_HIDDEN_THROW(uint32_t)
7405iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
7406 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
7407{
7408 /* Writeback everything. */
7409 off = iemNativeRegFlushPendingWrites(pReNative, off);
7410
7411 /*
7412 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
7413 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
7414 */
7415 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
7416 fGstShwFlush
7417 | RT_BIT_64(kIemNativeGstReg_Pc)
7418 | RT_BIT_64(kIemNativeGstReg_EFlags));
7419 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
7420
7421 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
7422
7423 /*
7424 * Load the parameters.
7425 */
7426#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
7427 /* Special code the hidden VBOXSTRICTRC pointer. */
7428 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7429 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
7430 if (cAddParams > 0)
7431 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
7432 if (cAddParams > 1)
7433 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
7434 if (cAddParams > 2)
7435 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
7436 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
7437
7438#else
7439 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
7440 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7441 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
7442 if (cAddParams > 0)
7443 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
7444 if (cAddParams > 1)
7445 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
7446 if (cAddParams > 2)
7447# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
7448 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
7449# else
7450 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
7451# endif
7452#endif
7453
7454 /*
7455 * Make the call.
7456 */
7457 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
7458
7459#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
7460 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
7461#endif
7462
7463 /*
7464 * Check the status code.
7465 */
7466 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
7467}
7468
7469
7470/**
7471 * Emits a call to a threaded worker function.
7472 */
7473DECL_HIDDEN_THROW(uint32_t)
7474iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
7475{
7476 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
7477
7478 /* We don't know what the threaded function is doing so we must flush all pending writes. */
7479 off = iemNativeRegFlushPendingWrites(pReNative, off);
7480
7481 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
7482 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
7483
7484#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7485 /* The threaded function may throw / long jmp, so set current instruction
7486 number if we're counting. */
7487 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7488#endif
7489
7490 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
7491
7492#ifdef RT_ARCH_AMD64
7493 /* Load the parameters and emit the call. */
7494# ifdef RT_OS_WINDOWS
7495# ifndef VBOXSTRICTRC_STRICT_ENABLED
7496 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
7497 if (cParams > 0)
7498 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
7499 if (cParams > 1)
7500 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
7501 if (cParams > 2)
7502 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
7503# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
7504 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
7505 if (cParams > 0)
7506 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
7507 if (cParams > 1)
7508 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
7509 if (cParams > 2)
7510 {
7511 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
7512 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
7513 }
7514 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
7515# endif /* VBOXSTRICTRC_STRICT_ENABLED */
7516# else
7517 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
7518 if (cParams > 0)
7519 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
7520 if (cParams > 1)
7521 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
7522 if (cParams > 2)
7523 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
7524# endif
7525
7526 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
7527
7528# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
7529 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
7530# endif
7531
7532#elif RT_ARCH_ARM64
7533 /*
7534 * ARM64:
7535 */
7536 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7537 if (cParams > 0)
7538 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
7539 if (cParams > 1)
7540 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
7541 if (cParams > 2)
7542 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
7543
7544 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
7545
7546#else
7547# error "port me"
7548#endif
7549
7550 /*
7551 * Check the status code.
7552 */
7553 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
7554
7555 return off;
7556}
7557
7558#ifdef VBOX_WITH_STATISTICS
7559/**
7560 * Emits code to update the thread call statistics.
7561 */
7562DECL_INLINE_THROW(uint32_t)
7563iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
7564{
7565 /*
7566 * Update threaded function stats.
7567 */
7568 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
7569 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
7570# if defined(RT_ARCH_ARM64)
7571 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
7572 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
7573 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
7574 iemNativeRegFreeTmp(pReNative, idxTmp1);
7575 iemNativeRegFreeTmp(pReNative, idxTmp2);
7576# else
7577 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
7578# endif
7579 return off;
7580}
7581#endif /* VBOX_WITH_STATISTICS */
7582
7583
7584/**
7585 * Emits the code at the ReturnWithFlags label (returns
7586 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
7587 */
7588static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7589{
7590 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
7591 if (idxLabel != UINT32_MAX)
7592 {
7593 iemNativeLabelDefine(pReNative, idxLabel, off);
7594
7595 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
7596
7597 /* jump back to the return sequence. */
7598 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7599 }
7600 return off;
7601}
7602
7603
7604/**
7605 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
7606 */
7607static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7608{
7609 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
7610 if (idxLabel != UINT32_MAX)
7611 {
7612 iemNativeLabelDefine(pReNative, idxLabel, off);
7613
7614 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
7615
7616 /* jump back to the return sequence. */
7617 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7618 }
7619 return off;
7620}
7621
7622
7623/**
7624 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
7625 */
7626static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7627{
7628 /*
7629 * Generate the rc + rcPassUp fiddling code if needed.
7630 */
7631 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
7632 if (idxLabel != UINT32_MAX)
7633 {
7634 iemNativeLabelDefine(pReNative, idxLabel, off);
7635
7636 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
7637#ifdef RT_ARCH_AMD64
7638# ifdef RT_OS_WINDOWS
7639# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7640 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
7641# endif
7642 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
7643 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
7644# else
7645 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
7646 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
7647# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7648 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
7649# endif
7650# endif
7651# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7652 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
7653# endif
7654
7655#else
7656 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
7657 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7658 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
7659#endif
7660
7661 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
7662 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7663 }
7664 return off;
7665}
7666
7667
7668/**
7669 * Emits a standard epilog.
7670 */
7671static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
7672{
7673 *pidxReturnLabel = UINT32_MAX;
7674
7675 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
7676 off = iemNativeRegFlushPendingWrites(pReNative, off);
7677
7678 /*
7679 * Successful return, so clear the return register (eax, w0).
7680 */
7681 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
7682
7683 /*
7684 * Define label for common return point.
7685 */
7686 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
7687 *pidxReturnLabel = idxReturn;
7688
7689 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
7690
7691 /*
7692 * Restore registers and return.
7693 */
7694#ifdef RT_ARCH_AMD64
7695 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
7696
7697 /* Reposition esp at the r15 restore point. */
7698 pbCodeBuf[off++] = X86_OP_REX_W;
7699 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
7700 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
7701 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
7702
7703 /* Pop non-volatile registers and return */
7704 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
7705 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
7706 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
7707 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
7708 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
7709 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
7710 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
7711 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
7712# ifdef RT_OS_WINDOWS
7713 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
7714 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
7715# endif
7716 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
7717 pbCodeBuf[off++] = 0xc9; /* leave */
7718 pbCodeBuf[off++] = 0xc3; /* ret */
7719 pbCodeBuf[off++] = 0xcc; /* int3 poison */
7720
7721#elif RT_ARCH_ARM64
7722 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7723
7724 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
7725 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
7726 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
7727 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
7728 IEMNATIVE_FRAME_VAR_SIZE / 8);
7729 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
7730 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7731 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
7732 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7733 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
7734 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7735 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
7736 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7737 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
7738 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7739 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
7740 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
7741
7742 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
7743 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
7744 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
7745 IEMNATIVE_FRAME_SAVE_REG_SIZE);
7746
7747 /* retab / ret */
7748# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
7749 if (1)
7750 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
7751 else
7752# endif
7753 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
7754
7755#else
7756# error "port me"
7757#endif
7758 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7759
7760 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
7761}
7762
7763
7764/**
7765 * Emits a standard prolog.
7766 */
7767static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7768{
7769#ifdef RT_ARCH_AMD64
7770 /*
7771 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
7772 * reserving 64 bytes for stack variables plus 4 non-register argument
7773 * slots. Fixed register assignment: xBX = pReNative;
7774 *
7775 * Since we always do the same register spilling, we can use the same
7776 * unwind description for all the code.
7777 */
7778 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7779 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
7780 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
7781 pbCodeBuf[off++] = 0x8b;
7782 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
7783 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
7784 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
7785# ifdef RT_OS_WINDOWS
7786 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
7787 pbCodeBuf[off++] = 0x8b;
7788 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
7789 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
7790 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
7791# else
7792 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
7793 pbCodeBuf[off++] = 0x8b;
7794 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
7795# endif
7796 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
7797 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
7798 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
7799 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
7800 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
7801 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
7802 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
7803 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
7804
7805# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
7806 /* Save the frame pointer. */
7807 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
7808# endif
7809
7810 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
7811 X86_GREG_xSP,
7812 IEMNATIVE_FRAME_ALIGN_SIZE
7813 + IEMNATIVE_FRAME_VAR_SIZE
7814 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
7815 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
7816 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
7817 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
7818 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
7819
7820#elif RT_ARCH_ARM64
7821 /*
7822 * We set up a stack frame exactly like on x86, only we have to push the
7823 * return address our selves here. We save all non-volatile registers.
7824 */
7825 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
7826
7827# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
7828 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
7829 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
7830 * in any way conditional, so just emitting this instructions now and hoping for the best... */
7831 /* pacibsp */
7832 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
7833# endif
7834
7835 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
7836 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
7837 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
7838 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
7839 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
7840 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
7841 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7842 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
7843 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7844 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
7845 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7846 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
7847 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7848 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
7849 /* Save the BP and LR (ret address) registers at the top of the frame. */
7850 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7851 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
7852 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
7853 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
7854 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
7855 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
7856
7857 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
7858 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
7859
7860 /* mov r28, r0 */
7861 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
7862 /* mov r27, r1 */
7863 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
7864
7865# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
7866 /* Save the frame pointer. */
7867 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
7868 ARMV8_A64_REG_X2);
7869# endif
7870
7871#else
7872# error "port me"
7873#endif
7874 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7875 return off;
7876}
7877
7878
7879/*********************************************************************************************************************************
7880* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
7881*********************************************************************************************************************************/
7882
7883/**
7884 * Internal work that allocates a variable with kind set to
7885 * kIemNativeVarKind_Invalid and no current stack allocation.
7886 *
7887 * The kind will either be set by the caller or later when the variable is first
7888 * assigned a value.
7889 *
7890 * @returns Unpacked index.
7891 * @internal
7892 */
7893static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7894{
7895 Assert(cbType > 0 && cbType <= 64);
7896 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
7897 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
7898 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
7899 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7900 pReNative->Core.aVars[idxVar].cbVar = cbType;
7901 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7902 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7903 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
7904 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
7905 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
7906 pReNative->Core.aVars[idxVar].fRegAcquired = false;
7907 pReNative->Core.aVars[idxVar].u.uValue = 0;
7908#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7909 pReNative->Core.aVars[idxVar].fSimdReg = false;
7910#endif
7911 return idxVar;
7912}
7913
7914
7915/**
7916 * Internal work that allocates an argument variable w/o setting enmKind.
7917 *
7918 * @returns Unpacked index.
7919 * @internal
7920 */
7921static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7922{
7923 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
7924 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7925 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
7926
7927 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7928 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
7929 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
7930 return idxVar;
7931}
7932
7933
7934/**
7935 * Gets the stack slot for a stack variable, allocating one if necessary.
7936 *
7937 * Calling this function implies that the stack slot will contain a valid
7938 * variable value. The caller deals with any register currently assigned to the
7939 * variable, typically by spilling it into the stack slot.
7940 *
7941 * @returns The stack slot number.
7942 * @param pReNative The recompiler state.
7943 * @param idxVar The variable.
7944 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
7945 */
7946DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7947{
7948 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7949 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7950 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
7951
7952 /* Already got a slot? */
7953 uint8_t const idxStackSlot = pVar->idxStackSlot;
7954 if (idxStackSlot != UINT8_MAX)
7955 {
7956 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
7957 return idxStackSlot;
7958 }
7959
7960 /*
7961 * A single slot is easy to allocate.
7962 * Allocate them from the top end, closest to BP, to reduce the displacement.
7963 */
7964 if (pVar->cbVar <= sizeof(uint64_t))
7965 {
7966 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7967 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7968 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
7969 pVar->idxStackSlot = (uint8_t)iSlot;
7970 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
7971 return (uint8_t)iSlot;
7972 }
7973
7974 /*
7975 * We need more than one stack slot.
7976 *
7977 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
7978 */
7979 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
7980 Assert(pVar->cbVar <= 64);
7981 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
7982 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
7983 uint32_t bmStack = pReNative->Core.bmStack;
7984 while (bmStack != UINT32_MAX)
7985 {
7986 unsigned iSlot = ASMBitLastSetU32(~bmStack);
7987 AssertStmt(iSlot, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7988 iSlot = (iSlot - 1) & ~fBitAlignMask;
7989 if ((bmStack & ~(fBitAllocMask << iSlot)) == bmStack)
7990 {
7991 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
7992 pVar->idxStackSlot = (uint8_t)iSlot;
7993 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7994 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
7995 return (uint8_t)iSlot;
7996 }
7997
7998 bmStack |= (fBitAllocMask << iSlot);
7999 }
8000 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
8001}
8002
8003
8004/**
8005 * Changes the variable to a stack variable.
8006 *
8007 * Currently this is s only possible to do the first time the variable is used,
8008 * switching later is can be implemented but not done.
8009 *
8010 * @param pReNative The recompiler state.
8011 * @param idxVar The variable.
8012 * @throws VERR_IEM_VAR_IPE_2
8013 */
8014DECL_HIDDEN_THROW(void) iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8015{
8016 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8017 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8018 if (pVar->enmKind != kIemNativeVarKind_Stack)
8019 {
8020 /* We could in theory transition from immediate to stack as well, but it
8021 would involve the caller doing work storing the value on the stack. So,
8022 till that's required we only allow transition from invalid. */
8023 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8024 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8025 pVar->enmKind = kIemNativeVarKind_Stack;
8026
8027 /* Note! We don't allocate a stack slot here, that's only done when a
8028 slot is actually needed to hold a variable value. */
8029 }
8030}
8031
8032
8033/**
8034 * Sets it to a variable with a constant value.
8035 *
8036 * This does not require stack storage as we know the value and can always
8037 * reload it, unless of course it's referenced.
8038 *
8039 * @param pReNative The recompiler state.
8040 * @param idxVar The variable.
8041 * @param uValue The immediate value.
8042 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
8043 */
8044DECL_HIDDEN_THROW(void) iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
8045{
8046 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8047 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8048 if (pVar->enmKind != kIemNativeVarKind_Immediate)
8049 {
8050 /* Only simple transitions for now. */
8051 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8052 pVar->enmKind = kIemNativeVarKind_Immediate;
8053 }
8054 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8055
8056 pVar->u.uValue = uValue;
8057 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
8058 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
8059 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
8060}
8061
8062
8063/**
8064 * Sets the variable to a reference (pointer) to @a idxOtherVar.
8065 *
8066 * This does not require stack storage as we know the value and can always
8067 * reload it. Loading is postponed till needed.
8068 *
8069 * @param pReNative The recompiler state.
8070 * @param idxVar The variable. Unpacked.
8071 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
8072 *
8073 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
8074 * @internal
8075 */
8076static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
8077{
8078 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
8079 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
8080
8081 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
8082 {
8083 /* Only simple transitions for now. */
8084 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
8085 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8086 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
8087 }
8088 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8089
8090 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
8091
8092 /* Update the other variable, ensure it's a stack variable. */
8093 /** @todo handle variables with const values... that'll go boom now. */
8094 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
8095 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8096}
8097
8098
8099/**
8100 * Sets the variable to a reference (pointer) to a guest register reference.
8101 *
8102 * This does not require stack storage as we know the value and can always
8103 * reload it. Loading is postponed till needed.
8104 *
8105 * @param pReNative The recompiler state.
8106 * @param idxVar The variable.
8107 * @param enmRegClass The class guest registers to reference.
8108 * @param idxReg The register within @a enmRegClass to reference.
8109 *
8110 * @throws VERR_IEM_VAR_IPE_2
8111 */
8112DECL_HIDDEN_THROW(void) iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
8113 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
8114{
8115 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8116 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8117
8118 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
8119 {
8120 /* Only simple transitions for now. */
8121 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8122 pVar->enmKind = kIemNativeVarKind_GstRegRef;
8123 }
8124 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8125
8126 pVar->u.GstRegRef.enmClass = enmRegClass;
8127 pVar->u.GstRegRef.idx = idxReg;
8128}
8129
8130
8131DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
8132{
8133 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
8134}
8135
8136
8137DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
8138{
8139 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
8140
8141 /* Since we're using a generic uint64_t value type, we must truncate it if
8142 the variable is smaller otherwise we may end up with too large value when
8143 scaling up a imm8 w/ sign-extension.
8144
8145 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
8146 in the bios, bx=1) when running on arm, because clang expect 16-bit
8147 register parameters to have bits 16 and up set to zero. Instead of
8148 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
8149 CF value in the result. */
8150 switch (cbType)
8151 {
8152 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
8153 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
8154 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
8155 }
8156 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
8157 return idxVar;
8158}
8159
8160
8161DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
8162{
8163 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
8164 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
8165 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
8166 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
8167 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
8168 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
8169
8170 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
8171 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
8172 return idxArgVar;
8173}
8174
8175
8176DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
8177{
8178 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
8179 /* Don't set to stack now, leave that to the first use as for instance
8180 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
8181 return idxVar;
8182}
8183
8184
8185DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
8186{
8187 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
8188
8189 /* Since we're using a generic uint64_t value type, we must truncate it if
8190 the variable is smaller otherwise we may end up with too large value when
8191 scaling up a imm8 w/ sign-extension. */
8192 switch (cbType)
8193 {
8194 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
8195 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
8196 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
8197 }
8198 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
8199 return idxVar;
8200}
8201
8202
8203/**
8204 * Makes sure variable @a idxVar has a register assigned to it and that it stays
8205 * fixed till we call iemNativeVarRegisterRelease.
8206 *
8207 * @returns The host register number.
8208 * @param pReNative The recompiler state.
8209 * @param idxVar The variable.
8210 * @param poff Pointer to the instruction buffer offset.
8211 * In case a register needs to be freed up or the value
8212 * loaded off the stack.
8213 * @param fInitialized Set if the variable must already have been initialized.
8214 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
8215 * the case.
8216 * @param idxRegPref Preferred register number or UINT8_MAX.
8217 */
8218DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
8219 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
8220{
8221 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8222 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8223 Assert(pVar->cbVar <= 8);
8224 Assert(!pVar->fRegAcquired);
8225
8226 uint8_t idxReg = pVar->idxReg;
8227 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8228 {
8229 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
8230 && pVar->enmKind < kIemNativeVarKind_End);
8231 pVar->fRegAcquired = true;
8232 return idxReg;
8233 }
8234
8235 /*
8236 * If the kind of variable has not yet been set, default to 'stack'.
8237 */
8238 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
8239 && pVar->enmKind < kIemNativeVarKind_End);
8240 if (pVar->enmKind == kIemNativeVarKind_Invalid)
8241 iemNativeVarSetKindToStack(pReNative, idxVar);
8242
8243 /*
8244 * We have to allocate a register for the variable, even if its a stack one
8245 * as we don't know if there are modification being made to it before its
8246 * finalized (todo: analyze and insert hints about that?).
8247 *
8248 * If we can, we try get the correct register for argument variables. This
8249 * is assuming that most argument variables are fetched as close as possible
8250 * to the actual call, so that there aren't any interfering hidden calls
8251 * (memory accesses, etc) inbetween.
8252 *
8253 * If we cannot or it's a variable, we make sure no argument registers
8254 * that will be used by this MC block will be allocated here, and we always
8255 * prefer non-volatile registers to avoid needing to spill stuff for internal
8256 * call.
8257 */
8258 /** @todo Detect too early argument value fetches and warn about hidden
8259 * calls causing less optimal code to be generated in the python script. */
8260
8261 uint8_t const uArgNo = pVar->uArgNo;
8262 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
8263 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
8264 {
8265 idxReg = g_aidxIemNativeCallRegs[uArgNo];
8266
8267#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8268 /* Writeback any dirty shadow registers we are about to unshadow. */
8269 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
8270#endif
8271
8272 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
8273 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
8274 }
8275 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
8276 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
8277 {
8278 /** @todo there must be a better way for this and boot cArgsX? */
8279 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgsX, IEMNATIVE_CALL_ARG_GREG_COUNT)];
8280 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
8281 & ~pReNative->Core.bmHstRegsWithGstShadow
8282 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
8283 & fNotArgsMask;
8284 if (fRegs)
8285 {
8286 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
8287 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
8288 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
8289 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
8290 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
8291 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
8292 }
8293 else
8294 {
8295 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
8296 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
8297 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
8298 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
8299 }
8300 }
8301 else
8302 {
8303 idxReg = idxRegPref;
8304 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
8305 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
8306 }
8307 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
8308 pVar->idxReg = idxReg;
8309
8310#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8311 pVar->fSimdReg = false;
8312#endif
8313
8314 /*
8315 * Load it off the stack if we've got a stack slot.
8316 */
8317 uint8_t const idxStackSlot = pVar->idxStackSlot;
8318 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
8319 {
8320 Assert(fInitialized);
8321 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
8322 switch (pVar->cbVar)
8323 {
8324 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
8325 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
8326 case 3: AssertFailed(); RT_FALL_THRU();
8327 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
8328 default: AssertFailed(); RT_FALL_THRU();
8329 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
8330 }
8331 }
8332 else
8333 {
8334 Assert(idxStackSlot == UINT8_MAX);
8335 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8336 }
8337 pVar->fRegAcquired = true;
8338 return idxReg;
8339}
8340
8341
8342#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8343/**
8344 * Makes sure variable @a idxVar has a SIMD register assigned to it and that it stays
8345 * fixed till we call iemNativeVarRegisterRelease.
8346 *
8347 * @returns The host register number.
8348 * @param pReNative The recompiler state.
8349 * @param idxVar The variable.
8350 * @param poff Pointer to the instruction buffer offset.
8351 * In case a register needs to be freed up or the value
8352 * loaded off the stack.
8353 * @param fInitialized Set if the variable must already have been initialized.
8354 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
8355 * the case.
8356 * @param idxRegPref Preferred SIMD register number or UINT8_MAX.
8357 */
8358DECL_HIDDEN_THROW(uint8_t) iemNativeVarSimdRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
8359 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
8360{
8361 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8362 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8363 Assert( pVar->cbVar == sizeof(RTUINT128U)
8364 || pVar->cbVar == sizeof(RTUINT256U));
8365 Assert(!pVar->fRegAcquired);
8366
8367 uint8_t idxReg = pVar->idxReg;
8368 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs))
8369 {
8370 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
8371 && pVar->enmKind < kIemNativeVarKind_End);
8372 pVar->fRegAcquired = true;
8373 return idxReg;
8374 }
8375
8376 /*
8377 * If the kind of variable has not yet been set, default to 'stack'.
8378 */
8379 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
8380 && pVar->enmKind < kIemNativeVarKind_End);
8381 if (pVar->enmKind == kIemNativeVarKind_Invalid)
8382 iemNativeVarSetKindToStack(pReNative, idxVar);
8383
8384 /*
8385 * We have to allocate a register for the variable, even if its a stack one
8386 * as we don't know if there are modification being made to it before its
8387 * finalized (todo: analyze and insert hints about that?).
8388 *
8389 * If we can, we try get the correct register for argument variables. This
8390 * is assuming that most argument variables are fetched as close as possible
8391 * to the actual call, so that there aren't any interfering hidden calls
8392 * (memory accesses, etc) inbetween.
8393 *
8394 * If we cannot or it's a variable, we make sure no argument registers
8395 * that will be used by this MC block will be allocated here, and we always
8396 * prefer non-volatile registers to avoid needing to spill stuff for internal
8397 * call.
8398 */
8399 /** @todo Detect too early argument value fetches and warn about hidden
8400 * calls causing less optimal code to be generated in the python script. */
8401
8402 uint8_t const uArgNo = pVar->uArgNo;
8403 Assert(uArgNo == UINT8_MAX); RT_NOREF(uArgNo); /* No SIMD registers as arguments for now. */
8404
8405 /* SIMD is bit simpler for now because there is no support for arguments. */
8406 if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
8407 || (pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegPref)))
8408 {
8409 uint32_t const fNotArgsMask = UINT32_MAX; //~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
8410 uint32_t const fRegs = ~pReNative->Core.bmHstSimdRegs
8411 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
8412 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
8413 & fNotArgsMask;
8414 if (fRegs)
8415 {
8416 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
8417 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
8418 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows == 0);
8419 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg)));
8420 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
8421 }
8422 else
8423 {
8424 idxReg = iemNativeSimdRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
8425 IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & fNotArgsMask);
8426 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
8427 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
8428 }
8429 }
8430 else
8431 {
8432 idxReg = idxRegPref;
8433 AssertReleaseFailed(); //iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
8434 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
8435 }
8436 iemNativeSimdRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
8437
8438 pVar->fSimdReg = true;
8439 pVar->idxReg = idxReg;
8440
8441 /*
8442 * Load it off the stack if we've got a stack slot.
8443 */
8444 uint8_t const idxStackSlot = pVar->idxStackSlot;
8445 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
8446 {
8447 Assert(fInitialized);
8448 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
8449 switch (pVar->cbVar)
8450 {
8451 case sizeof(RTUINT128U): *poff = iemNativeEmitLoadVecRegByBpU128(pReNative, *poff, idxReg, offDispBp); break;
8452 default: AssertFailed(); RT_FALL_THRU();
8453 case sizeof(RTUINT256U): *poff = iemNativeEmitLoadVecRegByBpU256(pReNative, *poff, idxReg, offDispBp); break;
8454 }
8455 }
8456 else
8457 {
8458 Assert(idxStackSlot == UINT8_MAX);
8459 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8460 }
8461 pVar->fRegAcquired = true;
8462 return idxReg;
8463}
8464#endif
8465
8466
8467/**
8468 * The value of variable @a idxVar will be written in full to the @a enmGstReg
8469 * guest register.
8470 *
8471 * This function makes sure there is a register for it and sets it to be the
8472 * current shadow copy of @a enmGstReg.
8473 *
8474 * @returns The host register number.
8475 * @param pReNative The recompiler state.
8476 * @param idxVar The variable.
8477 * @param enmGstReg The guest register this variable will be written to
8478 * after this call.
8479 * @param poff Pointer to the instruction buffer offset.
8480 * In case a register needs to be freed up or if the
8481 * variable content needs to be loaded off the stack.
8482 *
8483 * @note We DO NOT expect @a idxVar to be an argument variable,
8484 * because we can only in the commit stage of an instruction when this
8485 * function is used.
8486 */
8487DECL_HIDDEN_THROW(uint8_t)
8488iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
8489{
8490 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8491 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8492 Assert(!pVar->fRegAcquired);
8493 AssertMsgStmt( pVar->cbVar <= 8
8494 && ( pVar->enmKind == kIemNativeVarKind_Immediate
8495 || pVar->enmKind == kIemNativeVarKind_Stack),
8496 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
8497 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
8498 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
8499
8500 /*
8501 * This shouldn't ever be used for arguments, unless it's in a weird else
8502 * branch that doesn't do any calling and even then it's questionable.
8503 *
8504 * However, in case someone writes crazy wrong MC code and does register
8505 * updates before making calls, just use the regular register allocator to
8506 * ensure we get a register suitable for the intended argument number.
8507 */
8508 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
8509
8510 /*
8511 * If there is already a register for the variable, we transfer/set the
8512 * guest shadow copy assignment to it.
8513 */
8514 uint8_t idxReg = pVar->idxReg;
8515 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8516 {
8517#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8518 if (enmGstReg >= kIemNativeGstReg_GprFirst && enmGstReg <= kIemNativeGstReg_GprLast)
8519 {
8520# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8521 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
8522 iemNaitveDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
8523# endif
8524
8525 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
8526 }
8527#endif
8528
8529 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
8530 {
8531 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
8532 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
8533 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
8534 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
8535 }
8536 else
8537 {
8538 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
8539 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
8540 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
8541 }
8542 /** @todo figure this one out. We need some way of making sure the register isn't
8543 * modified after this point, just in case we start writing crappy MC code. */
8544 pVar->enmGstReg = enmGstReg;
8545 pVar->fRegAcquired = true;
8546 return idxReg;
8547 }
8548 Assert(pVar->uArgNo == UINT8_MAX);
8549
8550 /*
8551 * Because this is supposed to be the commit stage, we're just tag along with the
8552 * temporary register allocator and upgrade it to a variable register.
8553 */
8554 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
8555 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
8556 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
8557 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
8558 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
8559 pVar->idxReg = idxReg;
8560
8561 /*
8562 * Now we need to load the register value.
8563 */
8564 if (pVar->enmKind == kIemNativeVarKind_Immediate)
8565 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
8566 else
8567 {
8568 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8569 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
8570 switch (pVar->cbVar)
8571 {
8572 case sizeof(uint64_t):
8573 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
8574 break;
8575 case sizeof(uint32_t):
8576 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
8577 break;
8578 case sizeof(uint16_t):
8579 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
8580 break;
8581 case sizeof(uint8_t):
8582 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
8583 break;
8584 default:
8585 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
8586 }
8587 }
8588
8589 pVar->fRegAcquired = true;
8590 return idxReg;
8591}
8592
8593
8594/**
8595 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
8596 *
8597 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
8598 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
8599 * requirement of flushing anything in volatile host registers when making a
8600 * call.
8601 *
8602 * @returns New @a off value.
8603 * @param pReNative The recompiler state.
8604 * @param off The code buffer position.
8605 * @param fHstRegsNotToSave Set of registers not to save & restore.
8606 */
8607DECL_HIDDEN_THROW(uint32_t)
8608iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
8609{
8610 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
8611 if (fHstRegs)
8612 {
8613 do
8614 {
8615 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8616 fHstRegs &= ~RT_BIT_32(idxHstReg);
8617
8618 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
8619 {
8620 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
8621 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8622 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8623 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8624 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
8625 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8626 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8627 {
8628 case kIemNativeVarKind_Stack:
8629 {
8630 /* Temporarily spill the variable register. */
8631 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8632 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
8633 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8634 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8635 continue;
8636 }
8637
8638 case kIemNativeVarKind_Immediate:
8639 case kIemNativeVarKind_VarRef:
8640 case kIemNativeVarKind_GstRegRef:
8641 /* It is weird to have any of these loaded at this point. */
8642 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8643 continue;
8644
8645 case kIemNativeVarKind_End:
8646 case kIemNativeVarKind_Invalid:
8647 break;
8648 }
8649 AssertFailed();
8650 }
8651 else
8652 {
8653 /*
8654 * Allocate a temporary stack slot and spill the register to it.
8655 */
8656 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
8657 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
8658 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
8659 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
8660 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
8661 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
8662 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8663 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8664 }
8665 } while (fHstRegs);
8666 }
8667#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8668
8669 /*
8670 * Guest register shadows are flushed to CPUMCTX at the moment and don't need allocating a stack slot
8671 * which would be more difficult due to spanning multiple stack slots and different sizes
8672 * (besides we only have a limited amount of slots at the moment).
8673 *
8674 * However the shadows need to be flushed out as the guest SIMD register might get corrupted by
8675 * the callee. This asserts that the registers were written back earlier and are not in the dirty state.
8676 */
8677 iemNativeSimdRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK);
8678
8679 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
8680 if (fHstRegs)
8681 {
8682 do
8683 {
8684 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8685 fHstRegs &= ~RT_BIT_32(idxHstReg);
8686
8687 /* Fixed reserved and temporary registers don't need saving. */
8688 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved
8689 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp)
8690 continue;
8691
8692 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
8693
8694 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
8695 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8696 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8697 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8698 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
8699 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
8700 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
8701 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
8702 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8703 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8704 {
8705 case kIemNativeVarKind_Stack:
8706 {
8707 /* Temporarily spill the variable register. */
8708 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
8709 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8710 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
8711 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8712 if (cbVar == sizeof(RTUINT128U))
8713 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8714 else
8715 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8716 continue;
8717 }
8718
8719 case kIemNativeVarKind_Immediate:
8720 case kIemNativeVarKind_VarRef:
8721 case kIemNativeVarKind_GstRegRef:
8722 /* It is weird to have any of these loaded at this point. */
8723 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8724 continue;
8725
8726 case kIemNativeVarKind_End:
8727 case kIemNativeVarKind_Invalid:
8728 break;
8729 }
8730 AssertFailed();
8731 } while (fHstRegs);
8732 }
8733#endif
8734 return off;
8735}
8736
8737
8738/**
8739 * Emit code to restore volatile registers after to a call to a helper.
8740 *
8741 * @returns New @a off value.
8742 * @param pReNative The recompiler state.
8743 * @param off The code buffer position.
8744 * @param fHstRegsNotToSave Set of registers not to save & restore.
8745 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
8746 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
8747 */
8748DECL_HIDDEN_THROW(uint32_t)
8749iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
8750{
8751 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
8752 if (fHstRegs)
8753 {
8754 do
8755 {
8756 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8757 fHstRegs &= ~RT_BIT_32(idxHstReg);
8758
8759 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
8760 {
8761 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
8762 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8763 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8764 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8765 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
8766 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8767 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8768 {
8769 case kIemNativeVarKind_Stack:
8770 {
8771 /* Unspill the variable register. */
8772 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8773 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
8774 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8775 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8776 continue;
8777 }
8778
8779 case kIemNativeVarKind_Immediate:
8780 case kIemNativeVarKind_VarRef:
8781 case kIemNativeVarKind_GstRegRef:
8782 /* It is weird to have any of these loaded at this point. */
8783 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8784 continue;
8785
8786 case kIemNativeVarKind_End:
8787 case kIemNativeVarKind_Invalid:
8788 break;
8789 }
8790 AssertFailed();
8791 }
8792 else
8793 {
8794 /*
8795 * Restore from temporary stack slot.
8796 */
8797 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
8798 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
8799 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
8800 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
8801
8802 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8803 }
8804 } while (fHstRegs);
8805 }
8806#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8807 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
8808 if (fHstRegs)
8809 {
8810 do
8811 {
8812 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8813 fHstRegs &= ~RT_BIT_32(idxHstReg);
8814
8815 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp
8816 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved)
8817 continue;
8818 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
8819
8820 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
8821 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8822 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8823 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8824 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
8825 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
8826 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
8827 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
8828 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8829 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8830 {
8831 case kIemNativeVarKind_Stack:
8832 {
8833 /* Unspill the variable register. */
8834 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
8835 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8836 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
8837 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8838
8839 if (cbVar == sizeof(RTUINT128U))
8840 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8841 else
8842 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8843 continue;
8844 }
8845
8846 case kIemNativeVarKind_Immediate:
8847 case kIemNativeVarKind_VarRef:
8848 case kIemNativeVarKind_GstRegRef:
8849 /* It is weird to have any of these loaded at this point. */
8850 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8851 continue;
8852
8853 case kIemNativeVarKind_End:
8854 case kIemNativeVarKind_Invalid:
8855 break;
8856 }
8857 AssertFailed();
8858 } while (fHstRegs);
8859 }
8860#endif
8861 return off;
8862}
8863
8864
8865/**
8866 * Worker that frees the stack slots for variable @a idxVar if any allocated.
8867 *
8868 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
8869 *
8870 * ASSUMES that @a idxVar is valid and unpacked.
8871 */
8872DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8873{
8874 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
8875 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
8876 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
8877 {
8878 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
8879 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
8880 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
8881 Assert(cSlots > 0);
8882 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
8883 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
8884 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
8885 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
8886 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
8887 }
8888 else
8889 Assert(idxStackSlot == UINT8_MAX);
8890}
8891
8892
8893/**
8894 * Worker that frees a single variable.
8895 *
8896 * ASSUMES that @a idxVar is valid and unpacked.
8897 */
8898DECLHIDDEN(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8899{
8900 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
8901 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
8902 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
8903
8904 /* Free the host register first if any assigned. */
8905 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8906#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8907 if ( idxHstReg != UINT8_MAX
8908 && pReNative->Core.aVars[idxVar].fSimdReg)
8909 {
8910 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8911 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8912 pReNative->Core.aHstSimdRegs[idxHstReg].idxVar = UINT8_MAX;
8913 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
8914 }
8915 else
8916#endif
8917 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8918 {
8919 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8920 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8921 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8922 }
8923
8924 /* Free argument mapping. */
8925 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
8926 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
8927 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
8928
8929 /* Free the stack slots. */
8930 iemNativeVarFreeStackSlots(pReNative, idxVar);
8931
8932 /* Free the actual variable. */
8933 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
8934 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8935}
8936
8937
8938/**
8939 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
8940 */
8941DECLHIDDEN(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
8942{
8943 while (bmVars != 0)
8944 {
8945 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8946 bmVars &= ~RT_BIT_32(idxVar);
8947
8948#if 1 /** @todo optimize by simplifying this later... */
8949 iemNativeVarFreeOneWorker(pReNative, idxVar);
8950#else
8951 /* Only need to free the host register, the rest is done as bulk updates below. */
8952 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8953 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8954 {
8955 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8956 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8957 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8958 }
8959#endif
8960 }
8961#if 0 /** @todo optimize by simplifying this later... */
8962 pReNative->Core.bmVars = 0;
8963 pReNative->Core.bmStack = 0;
8964 pReNative->Core.u64ArgVars = UINT64_MAX;
8965#endif
8966}
8967
8968
8969
8970/*********************************************************************************************************************************
8971* Emitters for IEM_MC_CALL_CIMPL_XXX *
8972*********************************************************************************************************************************/
8973
8974/**
8975 * Emits code to load a reference to the given guest register into @a idxGprDst.
8976 */
8977DECL_HIDDEN_THROW(uint32_t)
8978iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
8979 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
8980{
8981#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8982 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
8983#endif
8984
8985 /*
8986 * Get the offset relative to the CPUMCTX structure.
8987 */
8988 uint32_t offCpumCtx;
8989 switch (enmClass)
8990 {
8991 case kIemNativeGstRegRef_Gpr:
8992 Assert(idxRegInClass < 16);
8993 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
8994 break;
8995
8996 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
8997 Assert(idxRegInClass < 4);
8998 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
8999 break;
9000
9001 case kIemNativeGstRegRef_EFlags:
9002 Assert(idxRegInClass == 0);
9003 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
9004 break;
9005
9006 case kIemNativeGstRegRef_MxCsr:
9007 Assert(idxRegInClass == 0);
9008 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
9009 break;
9010
9011 case kIemNativeGstRegRef_FpuReg:
9012 Assert(idxRegInClass < 8);
9013 AssertFailed(); /** @todo what kind of indexing? */
9014 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
9015 break;
9016
9017 case kIemNativeGstRegRef_MReg:
9018 Assert(idxRegInClass < 8);
9019 AssertFailed(); /** @todo what kind of indexing? */
9020 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
9021 break;
9022
9023 case kIemNativeGstRegRef_XReg:
9024 Assert(idxRegInClass < 16);
9025 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
9026 break;
9027
9028 case kIemNativeGstRegRef_X87: /* Not a register actually but we would just duplicate code otherwise. */
9029 Assert(idxRegInClass == 0);
9030 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87);
9031 break;
9032
9033 case kIemNativeGstRegRef_XState: /* Not a register actually but we would just duplicate code otherwise. */
9034 Assert(idxRegInClass == 0);
9035 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState);
9036 break;
9037
9038 default:
9039 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
9040 }
9041
9042 /*
9043 * Load the value into the destination register.
9044 */
9045#ifdef RT_ARCH_AMD64
9046 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
9047
9048#elif defined(RT_ARCH_ARM64)
9049 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
9050 Assert(offCpumCtx < 4096);
9051 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
9052
9053#else
9054# error "Port me!"
9055#endif
9056
9057 return off;
9058}
9059
9060
9061/**
9062 * Common code for CIMPL and AIMPL calls.
9063 *
9064 * These are calls that uses argument variables and such. They should not be
9065 * confused with internal calls required to implement an MC operation,
9066 * like a TLB load and similar.
9067 *
9068 * Upon return all that is left to do is to load any hidden arguments and
9069 * perform the call. All argument variables are freed.
9070 *
9071 * @returns New code buffer offset; throws VBox status code on error.
9072 * @param pReNative The native recompile state.
9073 * @param off The code buffer offset.
9074 * @param cArgs The total nubmer of arguments (includes hidden
9075 * count).
9076 * @param cHiddenArgs The number of hidden arguments. The hidden
9077 * arguments must not have any variable declared for
9078 * them, whereas all the regular arguments must
9079 * (tstIEMCheckMc ensures this).
9080 */
9081DECL_HIDDEN_THROW(uint32_t)
9082iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
9083{
9084#ifdef VBOX_STRICT
9085 /*
9086 * Assert sanity.
9087 */
9088 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
9089 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
9090 for (unsigned i = 0; i < cHiddenArgs; i++)
9091 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
9092 for (unsigned i = cHiddenArgs; i < cArgs; i++)
9093 {
9094 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
9095 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
9096 }
9097 iemNativeRegAssertSanity(pReNative);
9098#endif
9099
9100 /* We don't know what the called function makes use of, so flush any pending register writes. */
9101 off = iemNativeRegFlushPendingWrites(pReNative, off);
9102
9103 /*
9104 * Before we do anything else, go over variables that are referenced and
9105 * make sure they are not in a register.
9106 */
9107 uint32_t bmVars = pReNative->Core.bmVars;
9108 if (bmVars)
9109 {
9110 do
9111 {
9112 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
9113 bmVars &= ~RT_BIT_32(idxVar);
9114
9115 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
9116 {
9117 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
9118#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9119 if ( idxRegOld != UINT8_MAX
9120 && pReNative->Core.aVars[idxVar].fSimdReg)
9121 {
9122 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
9123 Assert(pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U) || pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT256U));
9124
9125 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
9126 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
9127 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
9128 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
9129 if (pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U))
9130 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
9131 else
9132 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
9133
9134 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
9135 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
9136
9137 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
9138 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
9139 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
9140 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
9141 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
9142 }
9143 else
9144#endif
9145 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
9146 {
9147 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
9148 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
9149 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
9150 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
9151 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
9152
9153 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
9154 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
9155 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
9156 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
9157 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
9158 }
9159 }
9160 } while (bmVars != 0);
9161#if 0 //def VBOX_STRICT
9162 iemNativeRegAssertSanity(pReNative);
9163#endif
9164 }
9165
9166 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
9167
9168 /*
9169 * First, go over the host registers that will be used for arguments and make
9170 * sure they either hold the desired argument or are free.
9171 */
9172 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
9173 {
9174 for (uint32_t i = 0; i < cRegArgs; i++)
9175 {
9176 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
9177 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
9178 {
9179 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
9180 {
9181 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
9182 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9183 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9184 Assert(pVar->idxReg == idxArgReg);
9185 uint8_t const uArgNo = pVar->uArgNo;
9186 if (uArgNo == i)
9187 { /* prefect */ }
9188 /* The variable allocator logic should make sure this is impossible,
9189 except for when the return register is used as a parameter (ARM,
9190 but not x86). */
9191#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
9192 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
9193 {
9194# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
9195# error "Implement this"
9196# endif
9197 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
9198 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
9199 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
9200 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
9201 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
9202 }
9203#endif
9204 else
9205 {
9206 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
9207
9208 if (pVar->enmKind == kIemNativeVarKind_Stack)
9209 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
9210 else
9211 {
9212 /* just free it, can be reloaded if used again */
9213 pVar->idxReg = UINT8_MAX;
9214 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
9215 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
9216 }
9217 }
9218 }
9219 else
9220 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
9221 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
9222 }
9223 }
9224#if 0 //def VBOX_STRICT
9225 iemNativeRegAssertSanity(pReNative);
9226#endif
9227 }
9228
9229 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
9230
9231#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
9232 /*
9233 * If there are any stack arguments, make sure they are in their place as well.
9234 *
9235 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
9236 * the caller) be loading it later and it must be free (see first loop).
9237 */
9238 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
9239 {
9240 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
9241 {
9242 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
9243 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
9244 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9245 {
9246 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
9247 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
9248 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
9249 pVar->idxReg = UINT8_MAX;
9250 }
9251 else
9252 {
9253 /* Use ARG0 as temp for stuff we need registers for. */
9254 switch (pVar->enmKind)
9255 {
9256 case kIemNativeVarKind_Stack:
9257 {
9258 uint8_t const idxStackSlot = pVar->idxStackSlot;
9259 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
9260 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
9261 iemNativeStackCalcBpDisp(idxStackSlot));
9262 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
9263 continue;
9264 }
9265
9266 case kIemNativeVarKind_Immediate:
9267 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
9268 continue;
9269
9270 case kIemNativeVarKind_VarRef:
9271 {
9272 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
9273 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
9274 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
9275 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
9276 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
9277# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9278 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
9279 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
9280 if ( fSimdReg
9281 && idxRegOther != UINT8_MAX)
9282 {
9283 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
9284 if (cbVar == sizeof(RTUINT128U))
9285 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
9286 else
9287 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
9288 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
9289 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
9290 }
9291 else
9292# endif
9293 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
9294 {
9295 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
9296 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
9297 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
9298 }
9299 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
9300 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
9301 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
9302 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
9303 continue;
9304 }
9305
9306 case kIemNativeVarKind_GstRegRef:
9307 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
9308 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
9309 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
9310 continue;
9311
9312 case kIemNativeVarKind_Invalid:
9313 case kIemNativeVarKind_End:
9314 break;
9315 }
9316 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
9317 }
9318 }
9319# if 0 //def VBOX_STRICT
9320 iemNativeRegAssertSanity(pReNative);
9321# endif
9322 }
9323#else
9324 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
9325#endif
9326
9327 /*
9328 * Make sure the argument variables are loaded into their respective registers.
9329 *
9330 * We can optimize this by ASSUMING that any register allocations are for
9331 * registeres that have already been loaded and are ready. The previous step
9332 * saw to that.
9333 */
9334 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
9335 {
9336 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
9337 {
9338 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
9339 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
9340 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
9341 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
9342 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
9343 else
9344 {
9345 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
9346 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9347 {
9348 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
9349 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
9350 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
9351 | RT_BIT_32(idxArgReg);
9352 pVar->idxReg = idxArgReg;
9353 }
9354 else
9355 {
9356 /* Use ARG0 as temp for stuff we need registers for. */
9357 switch (pVar->enmKind)
9358 {
9359 case kIemNativeVarKind_Stack:
9360 {
9361 uint8_t const idxStackSlot = pVar->idxStackSlot;
9362 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
9363 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
9364 continue;
9365 }
9366
9367 case kIemNativeVarKind_Immediate:
9368 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
9369 continue;
9370
9371 case kIemNativeVarKind_VarRef:
9372 {
9373 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
9374 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
9375 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
9376 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
9377 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
9378 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
9379#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9380 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
9381 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
9382 if ( fSimdReg
9383 && idxRegOther != UINT8_MAX)
9384 {
9385 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
9386 if (cbVar == sizeof(RTUINT128U))
9387 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
9388 else
9389 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
9390 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
9391 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
9392 }
9393 else
9394#endif
9395 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
9396 {
9397 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
9398 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
9399 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
9400 }
9401 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
9402 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
9403 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
9404 continue;
9405 }
9406
9407 case kIemNativeVarKind_GstRegRef:
9408 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
9409 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
9410 continue;
9411
9412 case kIemNativeVarKind_Invalid:
9413 case kIemNativeVarKind_End:
9414 break;
9415 }
9416 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
9417 }
9418 }
9419 }
9420#if 0 //def VBOX_STRICT
9421 iemNativeRegAssertSanity(pReNative);
9422#endif
9423 }
9424#ifdef VBOX_STRICT
9425 else
9426 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
9427 {
9428 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
9429 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
9430 }
9431#endif
9432
9433 /*
9434 * Free all argument variables (simplified).
9435 * Their lifetime always expires with the call they are for.
9436 */
9437 /** @todo Make the python script check that arguments aren't used after
9438 * IEM_MC_CALL_XXXX. */
9439 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
9440 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
9441 * an argument value. There is also some FPU stuff. */
9442 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
9443 {
9444 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
9445 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
9446
9447 /* no need to free registers: */
9448 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
9449 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
9450 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
9451 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
9452 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
9453 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
9454
9455 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
9456 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
9457 iemNativeVarFreeStackSlots(pReNative, idxVar);
9458 }
9459 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
9460
9461 /*
9462 * Flush volatile registers as we make the call.
9463 */
9464 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
9465
9466 return off;
9467}
9468
9469
9470
9471/*********************************************************************************************************************************
9472* TLB Lookup. *
9473*********************************************************************************************************************************/
9474
9475/**
9476 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
9477 */
9478DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
9479{
9480 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
9481 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
9482 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
9483 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
9484
9485 /* Do the lookup manually. */
9486 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
9487 uint64_t const uTag = IEMTLB_CALC_TAG( &pVCpu->iem.s.DataTlb, GCPtrFlat);
9488 PIEMTLBENTRY const pTlbe = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
9489 if (RT_LIKELY(pTlbe->uTag == uTag))
9490 {
9491 /*
9492 * Check TLB page table level access flags.
9493 */
9494 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
9495 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
9496 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
9497 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
9498 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
9499 | IEMTLBE_F_PG_UNASSIGNED
9500 | IEMTLBE_F_PT_NO_ACCESSED
9501 | fNoWriteNoDirty | fNoUser);
9502 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
9503 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
9504 {
9505 /*
9506 * Return the address.
9507 */
9508 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
9509 if ((uintptr_t)pbAddr == uResult)
9510 return;
9511 RT_NOREF(cbMem);
9512 AssertFailed();
9513 }
9514 else
9515 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
9516 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
9517 }
9518 else
9519 AssertFailed();
9520 RT_BREAKPOINT();
9521}
9522
9523/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
9524
9525
9526
9527/*********************************************************************************************************************************
9528* Recompiler Core. *
9529*********************************************************************************************************************************/
9530
9531/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
9532static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
9533{
9534 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
9535 pDis->cbCachedInstr += cbMaxRead;
9536 RT_NOREF(cbMinRead);
9537 return VERR_NO_DATA;
9538}
9539
9540
9541DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
9542{
9543 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
9544 {
9545#define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */
9546 ENTRY(fLocalForcedActions),
9547 ENTRY(iem.s.rcPassUp),
9548 ENTRY(iem.s.fExec),
9549 ENTRY(iem.s.pbInstrBuf),
9550 ENTRY(iem.s.uInstrBufPc),
9551 ENTRY(iem.s.GCPhysInstrBuf),
9552 ENTRY(iem.s.cbInstrBufTotal),
9553 ENTRY(iem.s.idxTbCurInstr),
9554#ifdef VBOX_WITH_STATISTICS
9555 ENTRY(iem.s.StatNativeTlbHitsForFetch),
9556 ENTRY(iem.s.StatNativeTlbHitsForStore),
9557 ENTRY(iem.s.StatNativeTlbHitsForStack),
9558 ENTRY(iem.s.StatNativeTlbHitsForMapped),
9559 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
9560 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
9561 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
9562 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
9563#endif
9564 ENTRY(iem.s.DataTlb.aEntries),
9565 ENTRY(iem.s.DataTlb.uTlbRevision),
9566 ENTRY(iem.s.DataTlb.uTlbPhysRev),
9567 ENTRY(iem.s.DataTlb.cTlbHits),
9568 ENTRY(iem.s.CodeTlb.aEntries),
9569 ENTRY(iem.s.CodeTlb.uTlbRevision),
9570 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
9571 ENTRY(iem.s.CodeTlb.cTlbHits),
9572 ENTRY(pVMR3),
9573 ENTRY(cpum.GstCtx.rax),
9574 ENTRY(cpum.GstCtx.ah),
9575 ENTRY(cpum.GstCtx.rcx),
9576 ENTRY(cpum.GstCtx.ch),
9577 ENTRY(cpum.GstCtx.rdx),
9578 ENTRY(cpum.GstCtx.dh),
9579 ENTRY(cpum.GstCtx.rbx),
9580 ENTRY(cpum.GstCtx.bh),
9581 ENTRY(cpum.GstCtx.rsp),
9582 ENTRY(cpum.GstCtx.rbp),
9583 ENTRY(cpum.GstCtx.rsi),
9584 ENTRY(cpum.GstCtx.rdi),
9585 ENTRY(cpum.GstCtx.r8),
9586 ENTRY(cpum.GstCtx.r9),
9587 ENTRY(cpum.GstCtx.r10),
9588 ENTRY(cpum.GstCtx.r11),
9589 ENTRY(cpum.GstCtx.r12),
9590 ENTRY(cpum.GstCtx.r13),
9591 ENTRY(cpum.GstCtx.r14),
9592 ENTRY(cpum.GstCtx.r15),
9593 ENTRY(cpum.GstCtx.es.Sel),
9594 ENTRY(cpum.GstCtx.es.u64Base),
9595 ENTRY(cpum.GstCtx.es.u32Limit),
9596 ENTRY(cpum.GstCtx.es.Attr),
9597 ENTRY(cpum.GstCtx.cs.Sel),
9598 ENTRY(cpum.GstCtx.cs.u64Base),
9599 ENTRY(cpum.GstCtx.cs.u32Limit),
9600 ENTRY(cpum.GstCtx.cs.Attr),
9601 ENTRY(cpum.GstCtx.ss.Sel),
9602 ENTRY(cpum.GstCtx.ss.u64Base),
9603 ENTRY(cpum.GstCtx.ss.u32Limit),
9604 ENTRY(cpum.GstCtx.ss.Attr),
9605 ENTRY(cpum.GstCtx.ds.Sel),
9606 ENTRY(cpum.GstCtx.ds.u64Base),
9607 ENTRY(cpum.GstCtx.ds.u32Limit),
9608 ENTRY(cpum.GstCtx.ds.Attr),
9609 ENTRY(cpum.GstCtx.fs.Sel),
9610 ENTRY(cpum.GstCtx.fs.u64Base),
9611 ENTRY(cpum.GstCtx.fs.u32Limit),
9612 ENTRY(cpum.GstCtx.fs.Attr),
9613 ENTRY(cpum.GstCtx.gs.Sel),
9614 ENTRY(cpum.GstCtx.gs.u64Base),
9615 ENTRY(cpum.GstCtx.gs.u32Limit),
9616 ENTRY(cpum.GstCtx.gs.Attr),
9617 ENTRY(cpum.GstCtx.rip),
9618 ENTRY(cpum.GstCtx.eflags),
9619 ENTRY(cpum.GstCtx.uRipInhibitInt),
9620 ENTRY(cpum.GstCtx.cr0),
9621 ENTRY(cpum.GstCtx.cr4),
9622 ENTRY(cpum.GstCtx.aXcr[0]),
9623 ENTRY(cpum.GstCtx.aXcr[1]),
9624#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9625 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
9626 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
9627 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
9628 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
9629 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
9630 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
9631 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
9632 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
9633 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
9634 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
9635 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
9636 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
9637 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
9638 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
9639 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
9640 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
9641 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
9642 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
9643 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
9644 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
9645 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
9646 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
9647 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
9648 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
9649 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
9650 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
9651 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
9652 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
9653 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
9654 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
9655 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
9656 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
9657#endif
9658#undef ENTRY
9659 };
9660#ifdef VBOX_STRICT
9661 static bool s_fOrderChecked = false;
9662 if (!s_fOrderChecked)
9663 {
9664 s_fOrderChecked = true;
9665 uint32_t offPrev = s_aMembers[0].off;
9666 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
9667 {
9668 Assert(s_aMembers[i].off > offPrev);
9669 offPrev = s_aMembers[i].off;
9670 }
9671 }
9672#endif
9673
9674 /*
9675 * Binary lookup.
9676 */
9677 unsigned iStart = 0;
9678 unsigned iEnd = RT_ELEMENTS(s_aMembers);
9679 for (;;)
9680 {
9681 unsigned const iCur = iStart + (iEnd - iStart) / 2;
9682 uint32_t const offCur = s_aMembers[iCur].off;
9683 if (off < offCur)
9684 {
9685 if (iCur != iStart)
9686 iEnd = iCur;
9687 else
9688 break;
9689 }
9690 else if (off > offCur)
9691 {
9692 if (iCur + 1 < iEnd)
9693 iStart = iCur + 1;
9694 else
9695 break;
9696 }
9697 else
9698 return s_aMembers[iCur].pszName;
9699 }
9700#ifdef VBOX_WITH_STATISTICS
9701 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
9702 return "iem.s.acThreadedFuncStats[iFn]";
9703#endif
9704 return NULL;
9705}
9706
9707
9708DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
9709{
9710 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
9711#if defined(RT_ARCH_AMD64)
9712 static const char * const a_apszMarkers[] =
9713 {
9714 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
9715 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
9716 };
9717#endif
9718
9719 char szDisBuf[512];
9720 DISSTATE Dis;
9721 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
9722 uint32_t const cNative = pTb->Native.cInstructions;
9723 uint32_t offNative = 0;
9724#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9725 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
9726#endif
9727 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9728 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9729 : DISCPUMODE_64BIT;
9730#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9731 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
9732#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9733 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
9734#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9735# error "Port me"
9736#else
9737 csh hDisasm = ~(size_t)0;
9738# if defined(RT_ARCH_AMD64)
9739 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
9740# elif defined(RT_ARCH_ARM64)
9741 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
9742# else
9743# error "Port me"
9744# endif
9745 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
9746
9747 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
9748 //Assert(rcCs == CS_ERR_OK);
9749#endif
9750
9751 /*
9752 * Print TB info.
9753 */
9754 pHlp->pfnPrintf(pHlp,
9755 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
9756 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
9757 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
9758 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
9759#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9760 if (pDbgInfo && pDbgInfo->cEntries > 1)
9761 {
9762 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
9763
9764 /*
9765 * This disassembly is driven by the debug info which follows the native
9766 * code and indicates when it starts with the next guest instructions,
9767 * where labels are and such things.
9768 */
9769 uint32_t idxThreadedCall = 0;
9770 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
9771 uint8_t idxRange = UINT8_MAX;
9772 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
9773 uint32_t offRange = 0;
9774 uint32_t offOpcodes = 0;
9775 uint32_t const cbOpcodes = pTb->cbOpcodes;
9776 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
9777 uint32_t const cDbgEntries = pDbgInfo->cEntries;
9778 uint32_t iDbgEntry = 1;
9779 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
9780
9781 while (offNative < cNative)
9782 {
9783 /* If we're at or have passed the point where the next chunk of debug
9784 info starts, process it. */
9785 if (offDbgNativeNext <= offNative)
9786 {
9787 offDbgNativeNext = UINT32_MAX;
9788 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
9789 {
9790 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
9791 {
9792 case kIemTbDbgEntryType_GuestInstruction:
9793 {
9794 /* Did the exec flag change? */
9795 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
9796 {
9797 pHlp->pfnPrintf(pHlp,
9798 " fExec change %#08x -> %#08x %s\n",
9799 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9800 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9801 szDisBuf, sizeof(szDisBuf)));
9802 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
9803 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9804 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9805 : DISCPUMODE_64BIT;
9806 }
9807
9808 /* New opcode range? We need to fend up a spurious debug info entry here for cases
9809 where the compilation was aborted before the opcode was recorded and the actual
9810 instruction was translated to a threaded call. This may happen when we run out
9811 of ranges, or when some complicated interrupts/FFs are found to be pending or
9812 similar. So, we just deal with it here rather than in the compiler code as it
9813 is a lot simpler to do here. */
9814 if ( idxRange == UINT8_MAX
9815 || idxRange >= cRanges
9816 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
9817 {
9818 idxRange += 1;
9819 if (idxRange < cRanges)
9820 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
9821 else
9822 continue;
9823 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
9824 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
9825 + (pTb->aRanges[idxRange].idxPhysPage == 0
9826 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9827 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
9828 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9829 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
9830 pTb->aRanges[idxRange].idxPhysPage);
9831 GCPhysPc += offRange;
9832 }
9833
9834 /* Disassemble the instruction. */
9835 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
9836 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
9837 uint32_t cbInstr = 1;
9838 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9839 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
9840 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9841 if (RT_SUCCESS(rc))
9842 {
9843 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9844 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9845 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9846 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9847
9848 static unsigned const s_offMarker = 55;
9849 static char const s_szMarker[] = " ; <--- guest";
9850 if (cch < s_offMarker)
9851 {
9852 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
9853 cch = s_offMarker;
9854 }
9855 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
9856 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
9857
9858 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
9859 }
9860 else
9861 {
9862 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
9863 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
9864 cbInstr = 1;
9865 }
9866 GCPhysPc += cbInstr;
9867 offOpcodes += cbInstr;
9868 offRange += cbInstr;
9869 continue;
9870 }
9871
9872 case kIemTbDbgEntryType_ThreadedCall:
9873 pHlp->pfnPrintf(pHlp,
9874 " Call #%u to %s (%u args) - %s\n",
9875 idxThreadedCall,
9876 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9877 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9878 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
9879 idxThreadedCall++;
9880 continue;
9881
9882 case kIemTbDbgEntryType_GuestRegShadowing:
9883 {
9884 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9885 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
9886 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
9887 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
9888 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9889 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
9890 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s \n", pszGstReg,
9891 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
9892 else
9893 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
9894 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
9895 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9896 continue;
9897 }
9898
9899#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9900 case kIemTbDbgEntryType_GuestSimdRegShadowing:
9901 {
9902 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9903 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
9904 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
9905 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
9906 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9907 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
9908 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
9909 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
9910 else
9911 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
9912 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
9913 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9914 continue;
9915 }
9916#endif
9917
9918 case kIemTbDbgEntryType_Label:
9919 {
9920 const char *pszName = "what_the_fudge";
9921 const char *pszComment = "";
9922 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
9923 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
9924 {
9925 case kIemNativeLabelType_Return: pszName = "Return"; break;
9926 case kIemNativeLabelType_ReturnBreak: pszName = "ReturnBreak"; break;
9927 case kIemNativeLabelType_ReturnWithFlags: pszName = "ReturnWithFlags"; break;
9928 case kIemNativeLabelType_NonZeroRetOrPassUp: pszName = "NonZeroRetOrPassUp"; break;
9929 case kIemNativeLabelType_RaiseDe: pszName = "RaiseDe"; break;
9930 case kIemNativeLabelType_RaiseUd: pszName = "RaiseUd"; break;
9931 case kIemNativeLabelType_RaiseSseRelated: pszName = "RaiseSseRelated"; break;
9932 case kIemNativeLabelType_RaiseAvxRelated: pszName = "RaiseAvxRelated"; break;
9933 case kIemNativeLabelType_RaiseSseAvxFpRelated: pszName = "RaiseSseAvxFpRelated"; break;
9934 case kIemNativeLabelType_RaiseNm: pszName = "RaiseNm"; break;
9935 case kIemNativeLabelType_RaiseGp0: pszName = "RaiseGp0"; break;
9936 case kIemNativeLabelType_RaiseMf: pszName = "RaiseMf"; break;
9937 case kIemNativeLabelType_RaiseXf: pszName = "RaiseXf"; break;
9938 case kIemNativeLabelType_ObsoleteTb: pszName = "ObsoleteTb"; break;
9939 case kIemNativeLabelType_NeedCsLimChecking: pszName = "NeedCsLimChecking"; break;
9940 case kIemNativeLabelType_CheckBranchMiss: pszName = "CheckBranchMiss"; break;
9941 case kIemNativeLabelType_If:
9942 pszName = "If";
9943 fNumbered = true;
9944 break;
9945 case kIemNativeLabelType_Else:
9946 pszName = "Else";
9947 fNumbered = true;
9948 pszComment = " ; regs state restored pre-if-block";
9949 break;
9950 case kIemNativeLabelType_Endif:
9951 pszName = "Endif";
9952 fNumbered = true;
9953 break;
9954 case kIemNativeLabelType_CheckIrq:
9955 pszName = "CheckIrq_CheckVM";
9956 fNumbered = true;
9957 break;
9958 case kIemNativeLabelType_TlbLookup:
9959 pszName = "TlbLookup";
9960 fNumbered = true;
9961 break;
9962 case kIemNativeLabelType_TlbMiss:
9963 pszName = "TlbMiss";
9964 fNumbered = true;
9965 break;
9966 case kIemNativeLabelType_TlbDone:
9967 pszName = "TlbDone";
9968 fNumbered = true;
9969 break;
9970 case kIemNativeLabelType_Invalid:
9971 case kIemNativeLabelType_End:
9972 break;
9973 }
9974 if (fNumbered)
9975 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
9976 else
9977 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
9978 continue;
9979 }
9980
9981 case kIemTbDbgEntryType_NativeOffset:
9982 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
9983 Assert(offDbgNativeNext > offNative);
9984 break;
9985
9986#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
9987 case kIemTbDbgEntryType_DelayedPcUpdate:
9988 pHlp->pfnPrintf(pHlp,
9989 " Updating guest PC value by %u (cInstrSkipped=%u)\n",
9990 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
9991 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
9992 continue;
9993#endif
9994
9995#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
9996 case kIemTbDbgEntryType_GuestRegDirty:
9997 {
9998 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9999 const char * const pszGstReg = pEntry->GuestRegDirty.fSimdReg
10000 ? g_aGstSimdShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName
10001 : g_aGstShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName;
10002 const char * const pszHstReg = pEntry->GuestRegDirty.fSimdReg
10003 ? g_apszIemNativeHstSimdRegNames[pEntry->GuestRegDirty.idxHstReg]
10004 : g_apszIemNativeHstRegNames[pEntry->GuestRegDirty.idxHstReg];
10005 pHlp->pfnPrintf(pHlp,
10006 " Guest register %s (shadowed by %s) is now dirty\n",
10007 pszGstReg, pszHstReg);
10008 continue;
10009 }
10010
10011 case kIemTbDbgEntryType_GuestRegWriteback:
10012 pHlp->pfnPrintf(pHlp,
10013 " Writing dirty %s registers (gst %#RX64)\n",
10014 pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fSimdReg ? "SIMD" : "general",
10015 pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fGstReg);
10016 continue;
10017#endif
10018
10019 default:
10020 AssertFailed();
10021 }
10022 iDbgEntry++;
10023 break;
10024 }
10025 }
10026
10027 /*
10028 * Disassemble the next native instruction.
10029 */
10030 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
10031# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
10032 uint32_t cbInstr = sizeof(paNative[0]);
10033 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
10034 if (RT_SUCCESS(rc))
10035 {
10036# if defined(RT_ARCH_AMD64)
10037 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
10038 {
10039 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
10040 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
10041 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
10042 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
10043 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
10044 uInfo & 0x8000 ? "recompiled" : "todo");
10045 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
10046 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
10047 else
10048 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
10049 }
10050 else
10051# endif
10052 {
10053 const char *pszAnnotation = NULL;
10054# ifdef RT_ARCH_AMD64
10055 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
10056 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
10057 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
10058 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
10059 PCDISOPPARAM pMemOp;
10060 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))
10061 pMemOp = &Dis.Param1;
10062 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))
10063 pMemOp = &Dis.Param2;
10064 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))
10065 pMemOp = &Dis.Param3;
10066 else
10067 pMemOp = NULL;
10068 if ( pMemOp
10069 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
10070 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
10071 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
10072 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
10073
10074#elif defined(RT_ARCH_ARM64)
10075 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
10076 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
10077 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
10078# else
10079# error "Port me"
10080# endif
10081 if (pszAnnotation)
10082 {
10083 static unsigned const s_offAnnotation = 55;
10084 size_t const cchAnnotation = strlen(pszAnnotation);
10085 size_t cchDis = strlen(szDisBuf);
10086 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
10087 {
10088 if (cchDis < s_offAnnotation)
10089 {
10090 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
10091 cchDis = s_offAnnotation;
10092 }
10093 szDisBuf[cchDis++] = ' ';
10094 szDisBuf[cchDis++] = ';';
10095 szDisBuf[cchDis++] = ' ';
10096 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
10097 }
10098 }
10099 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
10100 }
10101 }
10102 else
10103 {
10104# if defined(RT_ARCH_AMD64)
10105 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
10106 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
10107# elif defined(RT_ARCH_ARM64)
10108 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
10109# else
10110# error "Port me"
10111# endif
10112 cbInstr = sizeof(paNative[0]);
10113 }
10114 offNative += cbInstr / sizeof(paNative[0]);
10115
10116# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
10117 cs_insn *pInstr;
10118 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
10119 (uintptr_t)pNativeCur, 1, &pInstr);
10120 if (cInstrs > 0)
10121 {
10122 Assert(cInstrs == 1);
10123 const char *pszAnnotation = NULL;
10124# if defined(RT_ARCH_ARM64)
10125 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
10126 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
10127 {
10128 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
10129 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
10130 char *psz = strchr(pInstr->op_str, '[');
10131 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
10132 {
10133 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
10134 int32_t off = -1;
10135 psz += 4;
10136 if (*psz == ']')
10137 off = 0;
10138 else if (*psz == ',')
10139 {
10140 psz = RTStrStripL(psz + 1);
10141 if (*psz == '#')
10142 off = RTStrToInt32(&psz[1]);
10143 /** @todo deal with index registers and LSL as well... */
10144 }
10145 if (off >= 0)
10146 pszAnnotation = iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
10147 }
10148 }
10149# endif
10150
10151 size_t const cchOp = strlen(pInstr->op_str);
10152# if defined(RT_ARCH_AMD64)
10153 if (pszAnnotation)
10154 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
10155 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
10156 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
10157 else
10158 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
10159 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
10160
10161# else
10162 if (pszAnnotation)
10163 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
10164 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
10165 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
10166 else
10167 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
10168 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
10169# endif
10170 offNative += pInstr->size / sizeof(*pNativeCur);
10171 cs_free(pInstr, cInstrs);
10172 }
10173 else
10174 {
10175# if defined(RT_ARCH_AMD64)
10176 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
10177 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
10178# else
10179 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
10180# endif
10181 offNative++;
10182 }
10183# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
10184 }
10185 }
10186 else
10187#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
10188 {
10189 /*
10190 * No debug info, just disassemble the x86 code and then the native code.
10191 *
10192 * First the guest code:
10193 */
10194 for (unsigned i = 0; i < pTb->cRanges; i++)
10195 {
10196 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
10197 + (pTb->aRanges[i].idxPhysPage == 0
10198 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
10199 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
10200 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
10201 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
10202 unsigned off = pTb->aRanges[i].offOpcodes;
10203 /** @todo this ain't working when crossing pages! */
10204 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
10205 while (off < cbOpcodes)
10206 {
10207 uint32_t cbInstr = 1;
10208 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
10209 &pTb->pabOpcodes[off], cbOpcodes - off,
10210 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
10211 if (RT_SUCCESS(rc))
10212 {
10213 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
10214 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
10215 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
10216 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
10217 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
10218 GCPhysPc += cbInstr;
10219 off += cbInstr;
10220 }
10221 else
10222 {
10223 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
10224 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
10225 break;
10226 }
10227 }
10228 }
10229
10230 /*
10231 * Then the native code:
10232 */
10233 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
10234 while (offNative < cNative)
10235 {
10236 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
10237# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
10238 uint32_t cbInstr = sizeof(paNative[0]);
10239 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
10240 if (RT_SUCCESS(rc))
10241 {
10242# if defined(RT_ARCH_AMD64)
10243 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
10244 {
10245 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
10246 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
10247 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
10248 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
10249 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
10250 uInfo & 0x8000 ? "recompiled" : "todo");
10251 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
10252 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
10253 else
10254 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
10255 }
10256 else
10257# endif
10258 {
10259# ifdef RT_ARCH_AMD64
10260 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
10261 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
10262 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
10263 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
10264# elif defined(RT_ARCH_ARM64)
10265 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
10266 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
10267 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
10268# else
10269# error "Port me"
10270# endif
10271 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
10272 }
10273 }
10274 else
10275 {
10276# if defined(RT_ARCH_AMD64)
10277 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
10278 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
10279# else
10280 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
10281# endif
10282 cbInstr = sizeof(paNative[0]);
10283 }
10284 offNative += cbInstr / sizeof(paNative[0]);
10285
10286# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
10287 cs_insn *pInstr;
10288 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
10289 (uintptr_t)pNativeCur, 1, &pInstr);
10290 if (cInstrs > 0)
10291 {
10292 Assert(cInstrs == 1);
10293# if defined(RT_ARCH_AMD64)
10294 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
10295 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
10296# else
10297 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
10298 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
10299# endif
10300 offNative += pInstr->size / sizeof(*pNativeCur);
10301 cs_free(pInstr, cInstrs);
10302 }
10303 else
10304 {
10305# if defined(RT_ARCH_AMD64)
10306 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
10307 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
10308# else
10309 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
10310# endif
10311 offNative++;
10312 }
10313# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
10314 }
10315 }
10316
10317#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
10318 /* Cleanup. */
10319 cs_close(&hDisasm);
10320#endif
10321}
10322
10323
10324/**
10325 * Recompiles the given threaded TB into a native one.
10326 *
10327 * In case of failure the translation block will be returned as-is.
10328 *
10329 * @returns pTb.
10330 * @param pVCpu The cross context virtual CPU structure of the calling
10331 * thread.
10332 * @param pTb The threaded translation to recompile to native.
10333 */
10334DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
10335{
10336 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
10337
10338 /*
10339 * The first time thru, we allocate the recompiler state, the other times
10340 * we just need to reset it before using it again.
10341 */
10342 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
10343 if (RT_LIKELY(pReNative))
10344 iemNativeReInit(pReNative, pTb);
10345 else
10346 {
10347 pReNative = iemNativeInit(pVCpu, pTb);
10348 AssertReturn(pReNative, pTb);
10349 }
10350
10351#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
10352 /*
10353 * First do liveness analysis. This is done backwards.
10354 */
10355 {
10356 uint32_t idxCall = pTb->Thrd.cCalls;
10357 if (idxCall <= pReNative->cLivenessEntriesAlloc)
10358 { /* likely */ }
10359 else
10360 {
10361 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
10362 while (idxCall > cAlloc)
10363 cAlloc *= 2;
10364 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
10365 AssertReturn(pvNew, pTb);
10366 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
10367 pReNative->cLivenessEntriesAlloc = cAlloc;
10368 }
10369 AssertReturn(idxCall > 0, pTb);
10370 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
10371
10372 /* The initial (final) entry. */
10373 idxCall--;
10374 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
10375
10376 /* Loop backwards thru the calls and fill in the other entries. */
10377 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
10378 while (idxCall > 0)
10379 {
10380 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
10381 if (pfnLiveness)
10382 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
10383 else
10384 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
10385 pCallEntry--;
10386 idxCall--;
10387 }
10388
10389# ifdef VBOX_WITH_STATISTICS
10390 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
10391 to 'clobbered' rather that 'input'. */
10392 /** @todo */
10393# endif
10394 }
10395#endif
10396
10397 /*
10398 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
10399 * for aborting if an error happens.
10400 */
10401 uint32_t cCallsLeft = pTb->Thrd.cCalls;
10402#ifdef LOG_ENABLED
10403 uint32_t const cCallsOrg = cCallsLeft;
10404#endif
10405 uint32_t off = 0;
10406 int rc = VINF_SUCCESS;
10407 IEMNATIVE_TRY_SETJMP(pReNative, rc)
10408 {
10409 /*
10410 * Emit prolog code (fixed).
10411 */
10412 off = iemNativeEmitProlog(pReNative, off);
10413
10414 /*
10415 * Convert the calls to native code.
10416 */
10417#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10418 int32_t iGstInstr = -1;
10419#endif
10420#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
10421 uint32_t cThreadedCalls = 0;
10422 uint32_t cRecompiledCalls = 0;
10423#endif
10424#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
10425 uint32_t idxCurCall = 0;
10426#endif
10427 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
10428 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
10429 while (cCallsLeft-- > 0)
10430 {
10431 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
10432#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
10433 pReNative->idxCurCall = idxCurCall;
10434#endif
10435
10436 /*
10437 * Debug info, assembly markup and statistics.
10438 */
10439#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
10440 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
10441 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
10442#endif
10443#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10444 iemNativeDbgInfoAddNativeOffset(pReNative, off);
10445 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
10446 {
10447 if (iGstInstr < (int32_t)pTb->cInstructions)
10448 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
10449 else
10450 Assert(iGstInstr == pTb->cInstructions);
10451 iGstInstr = pCallEntry->idxInstr;
10452 }
10453 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
10454#endif
10455#if defined(VBOX_STRICT)
10456 off = iemNativeEmitMarker(pReNative, off,
10457 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
10458#endif
10459#if defined(VBOX_STRICT)
10460 iemNativeRegAssertSanity(pReNative);
10461#endif
10462#ifdef VBOX_WITH_STATISTICS
10463 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
10464#endif
10465
10466 /*
10467 * Actual work.
10468 */
10469 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
10470 pfnRecom ? "(recompiled)" : "(todo)"));
10471 if (pfnRecom) /** @todo stats on this. */
10472 {
10473 off = pfnRecom(pReNative, off, pCallEntry);
10474 STAM_REL_STATS({cRecompiledCalls++;});
10475 }
10476 else
10477 {
10478 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
10479 STAM_REL_STATS({cThreadedCalls++;});
10480 }
10481 Assert(off <= pReNative->cInstrBufAlloc);
10482 Assert(pReNative->cCondDepth == 0);
10483
10484#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
10485 if (LogIs2Enabled())
10486 {
10487 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
10488# ifndef IEMLIVENESS_EXTENDED_LAYOUT
10489 static const char s_achState[] = "CUXI";
10490# else
10491 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
10492# endif
10493
10494 char szGpr[17];
10495 for (unsigned i = 0; i < 16; i++)
10496 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
10497 szGpr[16] = '\0';
10498
10499 char szSegBase[X86_SREG_COUNT + 1];
10500 char szSegLimit[X86_SREG_COUNT + 1];
10501 char szSegAttrib[X86_SREG_COUNT + 1];
10502 char szSegSel[X86_SREG_COUNT + 1];
10503 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
10504 {
10505 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
10506 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
10507 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
10508 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
10509 }
10510 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
10511 = szSegSel[X86_SREG_COUNT] = '\0';
10512
10513 char szEFlags[8];
10514 for (unsigned i = 0; i < 7; i++)
10515 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
10516 szEFlags[7] = '\0';
10517
10518 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
10519 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
10520 }
10521#endif
10522
10523 /*
10524 * Advance.
10525 */
10526 pCallEntry++;
10527#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
10528 idxCurCall++;
10529#endif
10530 }
10531
10532 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
10533 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
10534 if (!cThreadedCalls)
10535 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
10536
10537 /*
10538 * Emit the epilog code.
10539 */
10540 uint32_t idxReturnLabel;
10541 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
10542
10543 /*
10544 * Generate special jump labels.
10545 */
10546 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
10547 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
10548 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
10549 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
10550
10551 /*
10552 * Generate simple TB tail labels that just calls a help with a pVCpu
10553 * arg and either return or longjmps/throws a non-zero status.
10554 *
10555 * The array entries must be ordered by enmLabel value so we can index
10556 * using fTailLabels bit numbers.
10557 */
10558 typedef IEM_DECL_NATIVE_HLP_PTR(int, PFNIEMNATIVESIMPLETAILLABELCALL,(PVMCPUCC pVCpu));
10559 static struct
10560 {
10561 IEMNATIVELABELTYPE enmLabel;
10562 PFNIEMNATIVESIMPLETAILLABELCALL pfnCallback;
10563 } const g_aSimpleTailLabels[] =
10564 {
10565 { kIemNativeLabelType_Invalid, NULL },
10566 { kIemNativeLabelType_RaiseDe, iemNativeHlpExecRaiseDe },
10567 { kIemNativeLabelType_RaiseUd, iemNativeHlpExecRaiseUd },
10568 { kIemNativeLabelType_RaiseSseRelated, iemNativeHlpExecRaiseSseRelated },
10569 { kIemNativeLabelType_RaiseAvxRelated, iemNativeHlpExecRaiseAvxRelated },
10570 { kIemNativeLabelType_RaiseSseAvxFpRelated, iemNativeHlpExecRaiseSseAvxFpRelated },
10571 { kIemNativeLabelType_RaiseNm, iemNativeHlpExecRaiseNm },
10572 { kIemNativeLabelType_RaiseGp0, iemNativeHlpExecRaiseGp0 },
10573 { kIemNativeLabelType_RaiseMf, iemNativeHlpExecRaiseMf },
10574 { kIemNativeLabelType_RaiseXf, iemNativeHlpExecRaiseXf },
10575 { kIemNativeLabelType_ObsoleteTb, iemNativeHlpObsoleteTb },
10576 { kIemNativeLabelType_NeedCsLimChecking, iemNativeHlpNeedCsLimChecking },
10577 { kIemNativeLabelType_CheckBranchMiss, iemNativeHlpCheckBranchMiss },
10578 };
10579 AssertCompile(RT_ELEMENTS(g_aSimpleTailLabels) == (unsigned)kIemNativeLabelType_LastSimple + 1U);
10580 AssertCompile(kIemNativeLabelType_Invalid == 0);
10581 uint64_t fTailLabels = pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastSimple + 1U) - 2U);
10582 if (fTailLabels)
10583 {
10584 do
10585 {
10586 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)(ASMBitFirstSetU64(fTailLabels) - 1U);
10587 fTailLabels &= ~RT_BIT_64(enmLabel);
10588 Assert(g_aSimpleTailLabels[enmLabel].enmLabel == enmLabel);
10589
10590 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
10591 Assert(idxLabel != UINT32_MAX);
10592 if (idxLabel != UINT32_MAX)
10593 {
10594 iemNativeLabelDefine(pReNative, idxLabel, off);
10595
10596 /* int pfnCallback(PVMCPUCC pVCpu) */
10597 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
10598 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_aSimpleTailLabels[enmLabel].pfnCallback);
10599
10600 /* jump back to the return sequence. */
10601 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
10602 }
10603
10604 } while (fTailLabels);
10605 }
10606 }
10607 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
10608 {
10609 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
10610 return pTb;
10611 }
10612 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
10613 Assert(off <= pReNative->cInstrBufAlloc);
10614
10615 /*
10616 * Make sure all labels has been defined.
10617 */
10618 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
10619#ifdef VBOX_STRICT
10620 uint32_t const cLabels = pReNative->cLabels;
10621 for (uint32_t i = 0; i < cLabels; i++)
10622 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
10623#endif
10624
10625 /*
10626 * Allocate executable memory, copy over the code we've generated.
10627 */
10628 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
10629 if (pTbAllocator->pDelayedFreeHead)
10630 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
10631
10632 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
10633 AssertReturn(paFinalInstrBuf, pTb);
10634 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
10635
10636 /*
10637 * Apply fixups.
10638 */
10639 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
10640 uint32_t const cFixups = pReNative->cFixups;
10641 for (uint32_t i = 0; i < cFixups; i++)
10642 {
10643 Assert(paFixups[i].off < off);
10644 Assert(paFixups[i].idxLabel < cLabels);
10645 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
10646 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
10647 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
10648 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
10649 switch (paFixups[i].enmType)
10650 {
10651#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
10652 case kIemNativeFixupType_Rel32:
10653 Assert(paFixups[i].off + 4 <= off);
10654 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10655 continue;
10656
10657#elif defined(RT_ARCH_ARM64)
10658 case kIemNativeFixupType_RelImm26At0:
10659 {
10660 Assert(paFixups[i].off < off);
10661 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10662 Assert(offDisp >= -262144 && offDisp < 262144);
10663 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
10664 continue;
10665 }
10666
10667 case kIemNativeFixupType_RelImm19At5:
10668 {
10669 Assert(paFixups[i].off < off);
10670 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10671 Assert(offDisp >= -262144 && offDisp < 262144);
10672 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
10673 continue;
10674 }
10675
10676 case kIemNativeFixupType_RelImm14At5:
10677 {
10678 Assert(paFixups[i].off < off);
10679 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10680 Assert(offDisp >= -8192 && offDisp < 8192);
10681 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
10682 continue;
10683 }
10684
10685#endif
10686 case kIemNativeFixupType_Invalid:
10687 case kIemNativeFixupType_End:
10688 break;
10689 }
10690 AssertFailed();
10691 }
10692
10693 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
10694 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
10695
10696 /*
10697 * Convert the translation block.
10698 */
10699 RTMemFree(pTb->Thrd.paCalls);
10700 pTb->Native.paInstructions = paFinalInstrBuf;
10701 pTb->Native.cInstructions = off;
10702 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
10703#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10704 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
10705 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
10706#endif
10707
10708 Assert(pTbAllocator->cThreadedTbs > 0);
10709 pTbAllocator->cThreadedTbs -= 1;
10710 pTbAllocator->cNativeTbs += 1;
10711 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
10712
10713#ifdef LOG_ENABLED
10714 /*
10715 * Disassemble to the log if enabled.
10716 */
10717 if (LogIs3Enabled())
10718 {
10719 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
10720 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
10721# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
10722 RTLogFlush(NULL);
10723# endif
10724 }
10725#endif
10726 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
10727
10728 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
10729 return pTb;
10730}
10731
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette