VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 103852

Last change on this file since 103852 was 103847, checked in by vboxsync, 11 months ago

Move iemTbFlagsToString() to be accessible to both callers, bugref:9898

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 402.9 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 103847 2024-03-14 11:29:54Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMN8veRecompilerTlbLookup.h"
103#include "IEMNativeFunctions.h"
104
105
106/*
107 * Narrow down configs here to avoid wasting time on unused configs here.
108 * Note! Same checks in IEMAllThrdRecompiler.cpp.
109 */
110
111#ifndef IEM_WITH_CODE_TLB
112# error The code TLB must be enabled for the recompiler.
113#endif
114
115#ifndef IEM_WITH_DATA_TLB
116# error The data TLB must be enabled for the recompiler.
117#endif
118
119#ifndef IEM_WITH_SETJMP
120# error The setjmp approach must be enabled for the recompiler.
121#endif
122
123/** @todo eliminate this clang build hack. */
124#if RT_CLANG_PREREQ(4, 0)
125# pragma GCC diagnostic ignored "-Wunused-function"
126#endif
127
128
129/*********************************************************************************************************************************
130* Internal Functions *
131*********************************************************************************************************************************/
132#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
133static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
134#endif
135DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
136DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
137 IEMNATIVEGSTREG enmGstReg, uint32_t off);
138DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
139
140
141/*********************************************************************************************************************************
142* Executable Memory Allocator *
143*********************************************************************************************************************************/
144/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
145 * Use an alternative chunk sub-allocator that does store internal data
146 * in the chunk.
147 *
148 * Using the RTHeapSimple is not practial on newer darwin systems where
149 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
150 * memory. We would have to change the protection of the whole chunk for
151 * every call to RTHeapSimple, which would be rather expensive.
152 *
153 * This alternative implemenation let restrict page protection modifications
154 * to the pages backing the executable memory we just allocated.
155 */
156#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
157/** The chunk sub-allocation unit size in bytes. */
158#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
159/** The chunk sub-allocation unit size as a shift factor. */
160#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
161
162#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
163# ifdef IEMNATIVE_USE_GDB_JIT
164# define IEMNATIVE_USE_GDB_JIT_ET_DYN
165
166/** GDB JIT: Code entry. */
167typedef struct GDBJITCODEENTRY
168{
169 struct GDBJITCODEENTRY *pNext;
170 struct GDBJITCODEENTRY *pPrev;
171 uint8_t *pbSymFile;
172 uint64_t cbSymFile;
173} GDBJITCODEENTRY;
174
175/** GDB JIT: Actions. */
176typedef enum GDBJITACTIONS : uint32_t
177{
178 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
179} GDBJITACTIONS;
180
181/** GDB JIT: Descriptor. */
182typedef struct GDBJITDESCRIPTOR
183{
184 uint32_t uVersion;
185 GDBJITACTIONS enmAction;
186 GDBJITCODEENTRY *pRelevant;
187 GDBJITCODEENTRY *pHead;
188 /** Our addition: */
189 GDBJITCODEENTRY *pTail;
190} GDBJITDESCRIPTOR;
191
192/** GDB JIT: Our simple symbol file data. */
193typedef struct GDBJITSYMFILE
194{
195 Elf64_Ehdr EHdr;
196# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
197 Elf64_Shdr aShdrs[5];
198# else
199 Elf64_Shdr aShdrs[7];
200 Elf64_Phdr aPhdrs[2];
201# endif
202 /** The dwarf ehframe data for the chunk. */
203 uint8_t abEhFrame[512];
204 char szzStrTab[128];
205 Elf64_Sym aSymbols[3];
206# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
207 Elf64_Sym aDynSyms[2];
208 Elf64_Dyn aDyn[6];
209# endif
210} GDBJITSYMFILE;
211
212extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
213extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
214
215/** Init once for g_IemNativeGdbJitLock. */
216static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
217/** Init once for the critical section. */
218static RTCRITSECT g_IemNativeGdbJitLock;
219
220/** GDB reads the info here. */
221GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
222
223/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
224DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
225{
226 ASMNopPause();
227}
228
229/** @callback_method_impl{FNRTONCE} */
230static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
231{
232 RT_NOREF(pvUser);
233 return RTCritSectInit(&g_IemNativeGdbJitLock);
234}
235
236
237# endif /* IEMNATIVE_USE_GDB_JIT */
238
239/**
240 * Per-chunk unwind info for non-windows hosts.
241 */
242typedef struct IEMEXECMEMCHUNKEHFRAME
243{
244# ifdef IEMNATIVE_USE_LIBUNWIND
245 /** The offset of the FDA into abEhFrame. */
246 uintptr_t offFda;
247# else
248 /** 'struct object' storage area. */
249 uint8_t abObject[1024];
250# endif
251# ifdef IEMNATIVE_USE_GDB_JIT
252# if 0
253 /** The GDB JIT 'symbol file' data. */
254 GDBJITSYMFILE GdbJitSymFile;
255# endif
256 /** The GDB JIT list entry. */
257 GDBJITCODEENTRY GdbJitEntry;
258# endif
259 /** The dwarf ehframe data for the chunk. */
260 uint8_t abEhFrame[512];
261} IEMEXECMEMCHUNKEHFRAME;
262/** Pointer to per-chunk info info for non-windows hosts. */
263typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
264#endif
265
266
267/**
268 * An chunk of executable memory.
269 */
270typedef struct IEMEXECMEMCHUNK
271{
272#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
273 /** Number of free items in this chunk. */
274 uint32_t cFreeUnits;
275 /** Hint were to start searching for free space in the allocation bitmap. */
276 uint32_t idxFreeHint;
277#else
278 /** The heap handle. */
279 RTHEAPSIMPLE hHeap;
280#endif
281 /** Pointer to the chunk. */
282 void *pvChunk;
283#ifdef IN_RING3
284 /**
285 * Pointer to the unwind information.
286 *
287 * This is used during C++ throw and longjmp (windows and probably most other
288 * platforms). Some debuggers (windbg) makes use of it as well.
289 *
290 * Windows: This is allocated from hHeap on windows because (at least for
291 * AMD64) the UNWIND_INFO structure address in the
292 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
293 *
294 * Others: Allocated from the regular heap to avoid unnecessary executable data
295 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
296 void *pvUnwindInfo;
297#elif defined(IN_RING0)
298 /** Allocation handle. */
299 RTR0MEMOBJ hMemObj;
300#endif
301} IEMEXECMEMCHUNK;
302/** Pointer to a memory chunk. */
303typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
304
305
306/**
307 * Executable memory allocator for the native recompiler.
308 */
309typedef struct IEMEXECMEMALLOCATOR
310{
311 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
312 uint32_t uMagic;
313
314 /** The chunk size. */
315 uint32_t cbChunk;
316 /** The maximum number of chunks. */
317 uint32_t cMaxChunks;
318 /** The current number of chunks. */
319 uint32_t cChunks;
320 /** Hint where to start looking for available memory. */
321 uint32_t idxChunkHint;
322 /** Statistics: Current number of allocations. */
323 uint32_t cAllocations;
324
325 /** The total amount of memory available. */
326 uint64_t cbTotal;
327 /** Total amount of free memory. */
328 uint64_t cbFree;
329 /** Total amount of memory allocated. */
330 uint64_t cbAllocated;
331
332#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
333 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
334 *
335 * Since the chunk size is a power of two and the minimum chunk size is a lot
336 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
337 * require a whole number of uint64_t elements in the allocation bitmap. So,
338 * for sake of simplicity, they are allocated as one continous chunk for
339 * simplicity/laziness. */
340 uint64_t *pbmAlloc;
341 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
342 uint32_t cUnitsPerChunk;
343 /** Number of bitmap elements per chunk (for quickly locating the bitmap
344 * portion corresponding to an chunk). */
345 uint32_t cBitmapElementsPerChunk;
346#else
347 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
348 * @{ */
349 /** The size of the heap internal block header. This is used to adjust the
350 * request memory size to make sure there is exacly enough room for a header at
351 * the end of the blocks we allocate before the next 64 byte alignment line. */
352 uint32_t cbHeapBlockHdr;
353 /** The size of initial heap allocation required make sure the first
354 * allocation is correctly aligned. */
355 uint32_t cbHeapAlignTweak;
356 /** The alignment tweak allocation address. */
357 void *pvAlignTweak;
358 /** @} */
359#endif
360
361#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
362 /** Pointer to the array of unwind info running parallel to aChunks (same
363 * allocation as this structure, located after the bitmaps).
364 * (For Windows, the structures must reside in 32-bit RVA distance to the
365 * actual chunk, so they are allocated off the chunk.) */
366 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
367#endif
368
369 /** The allocation chunks. */
370 RT_FLEXIBLE_ARRAY_EXTENSION
371 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
372} IEMEXECMEMALLOCATOR;
373/** Pointer to an executable memory allocator. */
374typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
375
376/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
377#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
378
379
380static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
381
382
383/**
384 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
385 * the heap statistics.
386 */
387static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
388 uint32_t cbReq, uint32_t idxChunk)
389{
390 pExecMemAllocator->cAllocations += 1;
391 pExecMemAllocator->cbAllocated += cbReq;
392#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
393 pExecMemAllocator->cbFree -= cbReq;
394#else
395 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
396#endif
397 pExecMemAllocator->idxChunkHint = idxChunk;
398
399#ifdef RT_OS_DARWIN
400 /*
401 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
402 * on darwin. So, we mark the pages returned as read+write after alloc and
403 * expect the caller to call iemExecMemAllocatorReadyForUse when done
404 * writing to the allocation.
405 *
406 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
407 * for details.
408 */
409 /** @todo detect if this is necessary... it wasn't required on 10.15 or
410 * whatever older version it was. */
411 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
412 AssertRC(rc);
413#endif
414
415 return pvRet;
416}
417
418
419#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
420static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
421 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
422{
423 /*
424 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
425 */
426 Assert(!(cToScan & 63));
427 Assert(!(idxFirst & 63));
428 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
429 pbmAlloc += idxFirst / 64;
430
431 /*
432 * Scan the bitmap for cReqUnits of consequtive clear bits
433 */
434 /** @todo This can probably be done more efficiently for non-x86 systems. */
435 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
436 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
437 {
438 uint32_t idxAddBit = 1;
439 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
440 idxAddBit++;
441 if (idxAddBit >= cReqUnits)
442 {
443 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
444
445 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
446 pChunk->cFreeUnits -= cReqUnits;
447 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
448
449 void * const pvRet = (uint8_t *)pChunk->pvChunk
450 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
451
452 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
453 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
454 }
455
456 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
457 }
458 return NULL;
459}
460#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
461
462
463static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
464{
465#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
466 /*
467 * Figure out how much to allocate.
468 */
469 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
470 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
471 {
472 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
473 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
474 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
475 {
476 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
477 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
478 if (pvRet)
479 return pvRet;
480 }
481 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
482 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
483 cReqUnits, idxChunk);
484 }
485#else
486 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
487 if (pvRet)
488 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
489#endif
490 return NULL;
491
492}
493
494
495/**
496 * Allocates @a cbReq bytes of executable memory.
497 *
498 * @returns Pointer to the memory, NULL if out of memory or other problem
499 * encountered.
500 * @param pVCpu The cross context virtual CPU structure of the calling
501 * thread.
502 * @param cbReq How many bytes are required.
503 */
504static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
505{
506 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
507 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
508 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
509
510
511 for (unsigned iIteration = 0;; iIteration++)
512 {
513 /*
514 * Adjust the request size so it'll fit the allocator alignment/whatnot.
515 *
516 * For the RTHeapSimple allocator this means to follow the logic described
517 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
518 * existing chunks if we think we've got sufficient free memory around.
519 *
520 * While for the alternative one we just align it up to a whole unit size.
521 */
522#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
523 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
524#else
525 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
526#endif
527 if (cbReq <= pExecMemAllocator->cbFree)
528 {
529 uint32_t const cChunks = pExecMemAllocator->cChunks;
530 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
531 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
532 {
533 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
534 if (pvRet)
535 return pvRet;
536 }
537 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
538 {
539 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
540 if (pvRet)
541 return pvRet;
542 }
543 }
544
545 /*
546 * Can we grow it with another chunk?
547 */
548 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
549 {
550 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
551 AssertLogRelRCReturn(rc, NULL);
552
553 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
554 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
555 if (pvRet)
556 return pvRet;
557 AssertFailed();
558 }
559
560 /*
561 * Try prune native TBs once.
562 */
563 if (iIteration == 0)
564 iemTbAllocatorFreeupNativeSpace(pVCpu, cbReq / sizeof(IEMNATIVEINSTR));
565 else
566 {
567 /** @todo stats... */
568 return NULL;
569 }
570 }
571
572}
573
574
575/** This is a hook that we may need later for changing memory protection back
576 * to readonly+exec */
577static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
578{
579#ifdef RT_OS_DARWIN
580 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
581 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
582 AssertRC(rc); RT_NOREF(pVCpu);
583
584 /*
585 * Flush the instruction cache:
586 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
587 */
588 /* sys_dcache_flush(pv, cb); - not necessary */
589 sys_icache_invalidate(pv, cb);
590#else
591 RT_NOREF(pVCpu, pv, cb);
592#endif
593}
594
595
596/**
597 * Frees executable memory.
598 */
599void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
600{
601 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
602 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
603 Assert(pv);
604#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
605 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
606#else
607 Assert(!((uintptr_t)pv & 63));
608#endif
609
610 /* Align the size as we did when allocating the block. */
611#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
612 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
613#else
614 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
615#endif
616
617 /* Free it / assert sanity. */
618#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
619 uint32_t const cChunks = pExecMemAllocator->cChunks;
620 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
621 bool fFound = false;
622 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
623 {
624 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
625 fFound = offChunk < cbChunk;
626 if (fFound)
627 {
628#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
629 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
630 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
631
632 /* Check that it's valid and free it. */
633 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
634 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
635 for (uint32_t i = 1; i < cReqUnits; i++)
636 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
637 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
638
639 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
640 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
641
642 /* Update the stats. */
643 pExecMemAllocator->cbAllocated -= cb;
644 pExecMemAllocator->cbFree += cb;
645 pExecMemAllocator->cAllocations -= 1;
646 return;
647#else
648 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
649 break;
650#endif
651 }
652 }
653# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
654 AssertFailed();
655# else
656 Assert(fFound);
657# endif
658#endif
659
660#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
661 /* Update stats while cb is freshly calculated.*/
662 pExecMemAllocator->cbAllocated -= cb;
663 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
664 pExecMemAllocator->cAllocations -= 1;
665
666 /* Free it. */
667 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
668#endif
669}
670
671
672
673#ifdef IN_RING3
674# ifdef RT_OS_WINDOWS
675
676/**
677 * Initializes the unwind info structures for windows hosts.
678 */
679static int
680iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
681 void *pvChunk, uint32_t idxChunk)
682{
683 RT_NOREF(pVCpu);
684
685 /*
686 * The AMD64 unwind opcodes.
687 *
688 * This is a program that starts with RSP after a RET instruction that
689 * ends up in recompiled code, and the operations we describe here will
690 * restore all non-volatile registers and bring RSP back to where our
691 * RET address is. This means it's reverse order from what happens in
692 * the prologue.
693 *
694 * Note! Using a frame register approach here both because we have one
695 * and but mainly because the UWOP_ALLOC_LARGE argument values
696 * would be a pain to write initializers for. On the positive
697 * side, we're impervious to changes in the the stack variable
698 * area can can deal with dynamic stack allocations if necessary.
699 */
700 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
701 {
702 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
703 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
704 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
705 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
706 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
707 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
708 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
709 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
710 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
711 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
712 };
713 union
714 {
715 IMAGE_UNWIND_INFO Info;
716 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
717 } s_UnwindInfo =
718 {
719 {
720 /* .Version = */ 1,
721 /* .Flags = */ 0,
722 /* .SizeOfProlog = */ 16, /* whatever */
723 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
724 /* .FrameRegister = */ X86_GREG_xBP,
725 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
726 }
727 };
728 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
729 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
730
731 /*
732 * Calc how much space we need and allocate it off the exec heap.
733 */
734 unsigned const cFunctionEntries = 1;
735 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
736 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
737# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
738 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
739 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
740 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
741# else
742 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
743 - pExecMemAllocator->cbHeapBlockHdr;
744 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
745 32 /*cbAlignment*/);
746# endif
747 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
748 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
749
750 /*
751 * Initialize the structures.
752 */
753 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
754
755 paFunctions[0].BeginAddress = 0;
756 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
757 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
758
759 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
760 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
761
762 /*
763 * Register it.
764 */
765 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
766 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
767
768 return VINF_SUCCESS;
769}
770
771
772# else /* !RT_OS_WINDOWS */
773
774/**
775 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
776 */
777DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
778{
779 if (iValue >= 64)
780 {
781 Assert(iValue < 0x2000);
782 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
783 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
784 }
785 else if (iValue >= 0)
786 *Ptr.pb++ = (uint8_t)iValue;
787 else if (iValue > -64)
788 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
789 else
790 {
791 Assert(iValue > -0x2000);
792 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
793 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
794 }
795 return Ptr;
796}
797
798
799/**
800 * Emits an ULEB128 encoded value (up to 64-bit wide).
801 */
802DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
803{
804 while (uValue >= 0x80)
805 {
806 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
807 uValue >>= 7;
808 }
809 *Ptr.pb++ = (uint8_t)uValue;
810 return Ptr;
811}
812
813
814/**
815 * Emits a CFA rule as register @a uReg + offset @a off.
816 */
817DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
818{
819 *Ptr.pb++ = DW_CFA_def_cfa;
820 Ptr = iemDwarfPutUleb128(Ptr, uReg);
821 Ptr = iemDwarfPutUleb128(Ptr, off);
822 return Ptr;
823}
824
825
826/**
827 * Emits a register (@a uReg) save location:
828 * CFA + @a off * data_alignment_factor
829 */
830DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
831{
832 if (uReg < 0x40)
833 *Ptr.pb++ = DW_CFA_offset | uReg;
834 else
835 {
836 *Ptr.pb++ = DW_CFA_offset_extended;
837 Ptr = iemDwarfPutUleb128(Ptr, uReg);
838 }
839 Ptr = iemDwarfPutUleb128(Ptr, off);
840 return Ptr;
841}
842
843
844# if 0 /* unused */
845/**
846 * Emits a register (@a uReg) save location, using signed offset:
847 * CFA + @a offSigned * data_alignment_factor
848 */
849DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
850{
851 *Ptr.pb++ = DW_CFA_offset_extended_sf;
852 Ptr = iemDwarfPutUleb128(Ptr, uReg);
853 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
854 return Ptr;
855}
856# endif
857
858
859/**
860 * Initializes the unwind info section for non-windows hosts.
861 */
862static int
863iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
864 void *pvChunk, uint32_t idxChunk)
865{
866 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
867 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
868
869 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
870
871 /*
872 * Generate the CIE first.
873 */
874# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
875 uint8_t const iDwarfVer = 3;
876# else
877 uint8_t const iDwarfVer = 4;
878# endif
879 RTPTRUNION const PtrCie = Ptr;
880 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
881 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
882 *Ptr.pb++ = iDwarfVer; /* DwARF version */
883 *Ptr.pb++ = 0; /* Augmentation. */
884 if (iDwarfVer >= 4)
885 {
886 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
887 *Ptr.pb++ = 0; /* Segment selector size. */
888 }
889# ifdef RT_ARCH_AMD64
890 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
891# else
892 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
893# endif
894 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
895# ifdef RT_ARCH_AMD64
896 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
897# elif defined(RT_ARCH_ARM64)
898 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
899# else
900# error "port me"
901# endif
902 /* Initial instructions: */
903# ifdef RT_ARCH_AMD64
904 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
905 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
906 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
907 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
908 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
909 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
910 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
911 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
912# elif defined(RT_ARCH_ARM64)
913# if 1
914 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
915# else
916 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
917# endif
918 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
919 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
920 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
921 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
922 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
923 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
924 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
925 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
926 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
927 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
928 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
929 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
930 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
931 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
932# else
933# error "port me"
934# endif
935 while ((Ptr.u - PtrCie.u) & 3)
936 *Ptr.pb++ = DW_CFA_nop;
937 /* Finalize the CIE size. */
938 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
939
940 /*
941 * Generate an FDE for the whole chunk area.
942 */
943# ifdef IEMNATIVE_USE_LIBUNWIND
944 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
945# endif
946 RTPTRUNION const PtrFde = Ptr;
947 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
948 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
949 Ptr.pu32++;
950 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
951 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
952# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
953 *Ptr.pb++ = DW_CFA_nop;
954# endif
955 while ((Ptr.u - PtrFde.u) & 3)
956 *Ptr.pb++ = DW_CFA_nop;
957 /* Finalize the FDE size. */
958 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
959
960 /* Terminator entry. */
961 *Ptr.pu32++ = 0;
962 *Ptr.pu32++ = 0; /* just to be sure... */
963 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
964
965 /*
966 * Register it.
967 */
968# ifdef IEMNATIVE_USE_LIBUNWIND
969 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
970# else
971 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
972 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
973# endif
974
975# ifdef IEMNATIVE_USE_GDB_JIT
976 /*
977 * Now for telling GDB about this (experimental).
978 *
979 * This seems to work best with ET_DYN.
980 */
981 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
982# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
983 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
984 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
985# else
986 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
987 - pExecMemAllocator->cbHeapBlockHdr;
988 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
989# endif
990 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
991 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
992
993 RT_ZERO(*pSymFile);
994
995 /*
996 * The ELF header:
997 */
998 pSymFile->EHdr.e_ident[0] = ELFMAG0;
999 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1000 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1001 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1002 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1003 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1004 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1005 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1006# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1007 pSymFile->EHdr.e_type = ET_DYN;
1008# else
1009 pSymFile->EHdr.e_type = ET_REL;
1010# endif
1011# ifdef RT_ARCH_AMD64
1012 pSymFile->EHdr.e_machine = EM_AMD64;
1013# elif defined(RT_ARCH_ARM64)
1014 pSymFile->EHdr.e_machine = EM_AARCH64;
1015# else
1016# error "port me"
1017# endif
1018 pSymFile->EHdr.e_version = 1; /*?*/
1019 pSymFile->EHdr.e_entry = 0;
1020# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1021 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1022# else
1023 pSymFile->EHdr.e_phoff = 0;
1024# endif
1025 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1026 pSymFile->EHdr.e_flags = 0;
1027 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1028# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1029 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1030 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1031# else
1032 pSymFile->EHdr.e_phentsize = 0;
1033 pSymFile->EHdr.e_phnum = 0;
1034# endif
1035 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1036 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1037 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1038
1039 uint32_t offStrTab = 0;
1040#define APPEND_STR(a_szStr) do { \
1041 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1042 offStrTab += sizeof(a_szStr); \
1043 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1044 } while (0)
1045#define APPEND_STR_FMT(a_szStr, ...) do { \
1046 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1047 offStrTab++; \
1048 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1049 } while (0)
1050
1051 /*
1052 * Section headers.
1053 */
1054 /* Section header #0: NULL */
1055 unsigned i = 0;
1056 APPEND_STR("");
1057 RT_ZERO(pSymFile->aShdrs[i]);
1058 i++;
1059
1060 /* Section header: .eh_frame */
1061 pSymFile->aShdrs[i].sh_name = offStrTab;
1062 APPEND_STR(".eh_frame");
1063 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1064 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1065# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1066 pSymFile->aShdrs[i].sh_offset
1067 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1068# else
1069 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1070 pSymFile->aShdrs[i].sh_offset = 0;
1071# endif
1072
1073 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1074 pSymFile->aShdrs[i].sh_link = 0;
1075 pSymFile->aShdrs[i].sh_info = 0;
1076 pSymFile->aShdrs[i].sh_addralign = 1;
1077 pSymFile->aShdrs[i].sh_entsize = 0;
1078 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1079 i++;
1080
1081 /* Section header: .shstrtab */
1082 unsigned const iShStrTab = i;
1083 pSymFile->EHdr.e_shstrndx = iShStrTab;
1084 pSymFile->aShdrs[i].sh_name = offStrTab;
1085 APPEND_STR(".shstrtab");
1086 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1087 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1088# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1089 pSymFile->aShdrs[i].sh_offset
1090 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1091# else
1092 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1093 pSymFile->aShdrs[i].sh_offset = 0;
1094# endif
1095 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1096 pSymFile->aShdrs[i].sh_link = 0;
1097 pSymFile->aShdrs[i].sh_info = 0;
1098 pSymFile->aShdrs[i].sh_addralign = 1;
1099 pSymFile->aShdrs[i].sh_entsize = 0;
1100 i++;
1101
1102 /* Section header: .symbols */
1103 pSymFile->aShdrs[i].sh_name = offStrTab;
1104 APPEND_STR(".symtab");
1105 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1106 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1107 pSymFile->aShdrs[i].sh_offset
1108 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1109 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1110 pSymFile->aShdrs[i].sh_link = iShStrTab;
1111 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1112 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1113 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1114 i++;
1115
1116# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1117 /* Section header: .symbols */
1118 pSymFile->aShdrs[i].sh_name = offStrTab;
1119 APPEND_STR(".dynsym");
1120 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1121 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1122 pSymFile->aShdrs[i].sh_offset
1123 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1124 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1125 pSymFile->aShdrs[i].sh_link = iShStrTab;
1126 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1127 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1128 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1129 i++;
1130# endif
1131
1132# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1133 /* Section header: .dynamic */
1134 pSymFile->aShdrs[i].sh_name = offStrTab;
1135 APPEND_STR(".dynamic");
1136 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1137 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1138 pSymFile->aShdrs[i].sh_offset
1139 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1140 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1141 pSymFile->aShdrs[i].sh_link = iShStrTab;
1142 pSymFile->aShdrs[i].sh_info = 0;
1143 pSymFile->aShdrs[i].sh_addralign = 1;
1144 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1145 i++;
1146# endif
1147
1148 /* Section header: .text */
1149 unsigned const iShText = i;
1150 pSymFile->aShdrs[i].sh_name = offStrTab;
1151 APPEND_STR(".text");
1152 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1153 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1154# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1155 pSymFile->aShdrs[i].sh_offset
1156 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1157# else
1158 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1159 pSymFile->aShdrs[i].sh_offset = 0;
1160# endif
1161 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1162 pSymFile->aShdrs[i].sh_link = 0;
1163 pSymFile->aShdrs[i].sh_info = 0;
1164 pSymFile->aShdrs[i].sh_addralign = 1;
1165 pSymFile->aShdrs[i].sh_entsize = 0;
1166 i++;
1167
1168 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1169
1170# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1171 /*
1172 * The program headers:
1173 */
1174 /* Everything in a single LOAD segment: */
1175 i = 0;
1176 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1177 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1178 pSymFile->aPhdrs[i].p_offset
1179 = pSymFile->aPhdrs[i].p_vaddr
1180 = pSymFile->aPhdrs[i].p_paddr = 0;
1181 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1182 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1183 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1184 i++;
1185 /* The .dynamic segment. */
1186 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1187 pSymFile->aPhdrs[i].p_flags = PF_R;
1188 pSymFile->aPhdrs[i].p_offset
1189 = pSymFile->aPhdrs[i].p_vaddr
1190 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1191 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1192 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1193 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1194 i++;
1195
1196 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1197
1198 /*
1199 * The dynamic section:
1200 */
1201 i = 0;
1202 pSymFile->aDyn[i].d_tag = DT_SONAME;
1203 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1204 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1205 i++;
1206 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1207 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1208 i++;
1209 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1210 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1211 i++;
1212 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1213 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1214 i++;
1215 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1216 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1217 i++;
1218 pSymFile->aDyn[i].d_tag = DT_NULL;
1219 i++;
1220 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1221# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1222
1223 /*
1224 * Symbol tables:
1225 */
1226 /** @todo gdb doesn't seem to really like this ... */
1227 i = 0;
1228 pSymFile->aSymbols[i].st_name = 0;
1229 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1230 pSymFile->aSymbols[i].st_value = 0;
1231 pSymFile->aSymbols[i].st_size = 0;
1232 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1233 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1234# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1235 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1236# endif
1237 i++;
1238
1239 pSymFile->aSymbols[i].st_name = 0;
1240 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1241 pSymFile->aSymbols[i].st_value = 0;
1242 pSymFile->aSymbols[i].st_size = 0;
1243 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1244 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1245 i++;
1246
1247 pSymFile->aSymbols[i].st_name = offStrTab;
1248 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1249# if 0
1250 pSymFile->aSymbols[i].st_shndx = iShText;
1251 pSymFile->aSymbols[i].st_value = 0;
1252# else
1253 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1254 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1255# endif
1256 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1257 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1258 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1259# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1260 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1261 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1262# endif
1263 i++;
1264
1265 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1266 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1267
1268 /*
1269 * The GDB JIT entry and informing GDB.
1270 */
1271 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1272# if 1
1273 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1274# else
1275 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1276# endif
1277
1278 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1279 RTCritSectEnter(&g_IemNativeGdbJitLock);
1280 pEhFrame->GdbJitEntry.pNext = NULL;
1281 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1282 if (__jit_debug_descriptor.pTail)
1283 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1284 else
1285 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1286 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1287 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1288
1289 /* Notify GDB: */
1290 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1291 __jit_debug_register_code();
1292 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1293 RTCritSectLeave(&g_IemNativeGdbJitLock);
1294
1295# else /* !IEMNATIVE_USE_GDB_JIT */
1296 RT_NOREF(pVCpu);
1297# endif /* !IEMNATIVE_USE_GDB_JIT */
1298
1299 return VINF_SUCCESS;
1300}
1301
1302# endif /* !RT_OS_WINDOWS */
1303#endif /* IN_RING3 */
1304
1305
1306/**
1307 * Adds another chunk to the executable memory allocator.
1308 *
1309 * This is used by the init code for the initial allocation and later by the
1310 * regular allocator function when it's out of memory.
1311 */
1312static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1313{
1314 /* Check that we've room for growth. */
1315 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1316 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1317
1318 /* Allocate a chunk. */
1319#ifdef RT_OS_DARWIN
1320 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1321#else
1322 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1323#endif
1324 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1325
1326#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1327 int rc = VINF_SUCCESS;
1328#else
1329 /* Initialize the heap for the chunk. */
1330 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1331 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1332 AssertRC(rc);
1333 if (RT_SUCCESS(rc))
1334 {
1335 /*
1336 * We want the memory to be aligned on 64 byte, so the first time thru
1337 * here we do some exploratory allocations to see how we can achieve this.
1338 * On subsequent runs we only make an initial adjustment allocation, if
1339 * necessary.
1340 *
1341 * Since we own the heap implementation, we know that the internal block
1342 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1343 * so all we need to wrt allocation size adjustments is to add 32 bytes
1344 * to the size, align up by 64 bytes, and subtract 32 bytes.
1345 *
1346 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1347 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1348 * allocation to force subsequent allocations to return 64 byte aligned
1349 * user areas.
1350 */
1351 if (!pExecMemAllocator->cbHeapBlockHdr)
1352 {
1353 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1354 pExecMemAllocator->cbHeapAlignTweak = 64;
1355 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1356 32 /*cbAlignment*/);
1357 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1358
1359 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1360 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1361 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1362 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1363 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1364
1365 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1366 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1367 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1368 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1369 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1370
1371 RTHeapSimpleFree(hHeap, pvTest2);
1372 RTHeapSimpleFree(hHeap, pvTest1);
1373 }
1374 else
1375 {
1376 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1377 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1378 }
1379 if (RT_SUCCESS(rc))
1380#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1381 {
1382 /*
1383 * Add the chunk.
1384 *
1385 * This must be done before the unwind init so windows can allocate
1386 * memory from the chunk when using the alternative sub-allocator.
1387 */
1388 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1389#ifdef IN_RING3
1390 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1391#endif
1392#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1393 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1394#else
1395 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1396 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1397 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1398 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1399#endif
1400
1401 pExecMemAllocator->cChunks = idxChunk + 1;
1402 pExecMemAllocator->idxChunkHint = idxChunk;
1403
1404#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1405 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1406 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1407#else
1408 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1409 pExecMemAllocator->cbTotal += cbFree;
1410 pExecMemAllocator->cbFree += cbFree;
1411#endif
1412
1413#ifdef IN_RING3
1414 /*
1415 * Initialize the unwind information (this cannot really fail atm).
1416 * (This sets pvUnwindInfo.)
1417 */
1418 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1419 if (RT_SUCCESS(rc))
1420#endif
1421 {
1422 return VINF_SUCCESS;
1423 }
1424
1425#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1426 /* Just in case the impossible happens, undo the above up: */
1427 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1428 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1429 pExecMemAllocator->cChunks = idxChunk;
1430 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1431 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1432 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1433 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1434#endif
1435 }
1436#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1437 }
1438#endif
1439 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1440 RT_NOREF(pVCpu);
1441 return rc;
1442}
1443
1444
1445/**
1446 * Initializes the executable memory allocator for native recompilation on the
1447 * calling EMT.
1448 *
1449 * @returns VBox status code.
1450 * @param pVCpu The cross context virtual CPU structure of the calling
1451 * thread.
1452 * @param cbMax The max size of the allocator.
1453 * @param cbInitial The initial allocator size.
1454 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1455 * dependent).
1456 */
1457int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1458{
1459 /*
1460 * Validate input.
1461 */
1462 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1463 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1464 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1465 || cbChunk == 0
1466 || ( RT_IS_POWER_OF_TWO(cbChunk)
1467 && cbChunk >= _1M
1468 && cbChunk <= _256M
1469 && cbChunk <= cbMax),
1470 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1471 VERR_OUT_OF_RANGE);
1472
1473 /*
1474 * Adjust/figure out the chunk size.
1475 */
1476 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1477 {
1478 if (cbMax >= _256M)
1479 cbChunk = _64M;
1480 else
1481 {
1482 if (cbMax < _16M)
1483 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1484 else
1485 cbChunk = (uint32_t)cbMax / 4;
1486 if (!RT_IS_POWER_OF_TWO(cbChunk))
1487 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1488 }
1489 }
1490
1491 if (cbChunk > cbMax)
1492 cbMax = cbChunk;
1493 else
1494 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1495 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1496 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1497
1498 /*
1499 * Allocate and initialize the allocatore instance.
1500 */
1501 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1502#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1503 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1504 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1505 cbNeeded += cbBitmap * cMaxChunks;
1506 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1507 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1508#endif
1509#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1510 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1511 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1512#endif
1513 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1514 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1515 VERR_NO_MEMORY);
1516 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1517 pExecMemAllocator->cbChunk = cbChunk;
1518 pExecMemAllocator->cMaxChunks = cMaxChunks;
1519 pExecMemAllocator->cChunks = 0;
1520 pExecMemAllocator->idxChunkHint = 0;
1521 pExecMemAllocator->cAllocations = 0;
1522 pExecMemAllocator->cbTotal = 0;
1523 pExecMemAllocator->cbFree = 0;
1524 pExecMemAllocator->cbAllocated = 0;
1525#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1526 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1527 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1528 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1529 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1530#endif
1531#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1532 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1533#endif
1534 for (uint32_t i = 0; i < cMaxChunks; i++)
1535 {
1536#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1537 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1538 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1539#else
1540 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1541#endif
1542 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1543#ifdef IN_RING0
1544 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1545#else
1546 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1547#endif
1548 }
1549 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1550
1551 /*
1552 * Do the initial allocations.
1553 */
1554 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1555 {
1556 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1557 AssertLogRelRCReturn(rc, rc);
1558 }
1559
1560 pExecMemAllocator->idxChunkHint = 0;
1561
1562 return VINF_SUCCESS;
1563}
1564
1565
1566/*********************************************************************************************************************************
1567* Native Recompilation *
1568*********************************************************************************************************************************/
1569
1570
1571/**
1572 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1573 */
1574IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1575{
1576 pVCpu->iem.s.cInstructions += idxInstr;
1577 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1578}
1579
1580
1581/**
1582 * Used by TB code when it wants to raise a \#GP(0).
1583 */
1584IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
1585{
1586 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1587#ifndef _MSC_VER
1588 return VINF_IEM_RAISED_XCPT; /* not reached */
1589#endif
1590}
1591
1592
1593/**
1594 * Used by TB code when it wants to raise a \#NM.
1595 */
1596IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
1597{
1598 iemRaiseDeviceNotAvailableJmp(pVCpu);
1599#ifndef _MSC_VER
1600 return VINF_IEM_RAISED_XCPT; /* not reached */
1601#endif
1602}
1603
1604
1605/**
1606 * Used by TB code when it wants to raise a \#UD.
1607 */
1608IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
1609{
1610 iemRaiseUndefinedOpcodeJmp(pVCpu);
1611#ifndef _MSC_VER
1612 return VINF_IEM_RAISED_XCPT; /* not reached */
1613#endif
1614}
1615
1616
1617/**
1618 * Used by TB code when it wants to raise a \#MF.
1619 */
1620IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
1621{
1622 iemRaiseMathFaultJmp(pVCpu);
1623#ifndef _MSC_VER
1624 return VINF_IEM_RAISED_XCPT; /* not reached */
1625#endif
1626}
1627
1628
1629/**
1630 * Used by TB code when it wants to raise a \#XF.
1631 */
1632IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
1633{
1634 iemRaiseSimdFpExceptionJmp(pVCpu);
1635#ifndef _MSC_VER
1636 return VINF_IEM_RAISED_XCPT; /* not reached */
1637#endif
1638}
1639
1640
1641/**
1642 * Used by TB code when it wants to raise a \#DE.
1643 */
1644IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseDe,(PVMCPUCC pVCpu))
1645{
1646 iemRaiseDivideErrorJmp(pVCpu);
1647#ifndef _MSC_VER
1648 return VINF_IEM_RAISED_XCPT; /* not reached */
1649#endif
1650}
1651
1652
1653/**
1654 * Used by TB code when detecting opcode changes.
1655 * @see iemThreadeFuncWorkerObsoleteTb
1656 */
1657IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
1658{
1659 /* We set fSafeToFree to false where as we're being called in the context
1660 of a TB callback function, which for native TBs means we cannot release
1661 the executable memory till we've returned our way back to iemTbExec as
1662 that return path codes via the native code generated for the TB. */
1663 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1664 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
1665 return VINF_IEM_REEXEC_BREAK;
1666}
1667
1668
1669/**
1670 * Used by TB code when we need to switch to a TB with CS.LIM checking.
1671 */
1672IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
1673{
1674 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
1675 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1676 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
1677 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
1678 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
1679 return VINF_IEM_REEXEC_BREAK;
1680}
1681
1682
1683/**
1684 * Used by TB code when we missed a PC check after a branch.
1685 */
1686IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
1687{
1688 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
1689 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1690 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
1691 pVCpu->iem.s.pbInstrBuf));
1692 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
1693 return VINF_IEM_REEXEC_BREAK;
1694}
1695
1696
1697
1698/*********************************************************************************************************************************
1699* Helpers: Segmented memory fetches and stores. *
1700*********************************************************************************************************************************/
1701
1702/**
1703 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1704 */
1705IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1706{
1707#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1708 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1709#else
1710 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1711#endif
1712}
1713
1714
1715/**
1716 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1717 * to 16 bits.
1718 */
1719IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1720{
1721#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1722 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1723#else
1724 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1725#endif
1726}
1727
1728
1729/**
1730 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1731 * to 32 bits.
1732 */
1733IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1734{
1735#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1736 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1737#else
1738 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1739#endif
1740}
1741
1742/**
1743 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1744 * to 64 bits.
1745 */
1746IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1747{
1748#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1749 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1750#else
1751 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1752#endif
1753}
1754
1755
1756/**
1757 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1758 */
1759IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1760{
1761#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1762 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1763#else
1764 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1765#endif
1766}
1767
1768
1769/**
1770 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1771 * to 32 bits.
1772 */
1773IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1774{
1775#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1776 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1777#else
1778 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1779#endif
1780}
1781
1782
1783/**
1784 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1785 * to 64 bits.
1786 */
1787IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1788{
1789#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1790 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1791#else
1792 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1793#endif
1794}
1795
1796
1797/**
1798 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1799 */
1800IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1801{
1802#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1803 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1804#else
1805 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1806#endif
1807}
1808
1809
1810/**
1811 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
1812 * to 64 bits.
1813 */
1814IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1815{
1816#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1817 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1818#else
1819 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1820#endif
1821}
1822
1823
1824/**
1825 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1826 */
1827IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1828{
1829#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1830 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
1831#else
1832 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
1833#endif
1834}
1835
1836
1837/**
1838 * Used by TB code to store unsigned 8-bit data w/ segmentation.
1839 */
1840IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
1841{
1842#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1843 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1844#else
1845 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1846#endif
1847}
1848
1849
1850/**
1851 * Used by TB code to store unsigned 16-bit data w/ segmentation.
1852 */
1853IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
1854{
1855#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1856 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1857#else
1858 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1859#endif
1860}
1861
1862
1863/**
1864 * Used by TB code to store unsigned 32-bit data w/ segmentation.
1865 */
1866IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
1867{
1868#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1869 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1870#else
1871 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1872#endif
1873}
1874
1875
1876/**
1877 * Used by TB code to store unsigned 64-bit data w/ segmentation.
1878 */
1879IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
1880{
1881#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1882 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1883#else
1884 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1885#endif
1886}
1887
1888
1889
1890/**
1891 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
1892 */
1893IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1894{
1895#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1896 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1897#else
1898 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1899#endif
1900}
1901
1902
1903/**
1904 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
1905 */
1906IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1907{
1908#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1909 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1910#else
1911 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1912#endif
1913}
1914
1915
1916/**
1917 * Used by TB code to store an 32-bit selector value onto a generic stack.
1918 *
1919 * Intel CPUs doesn't do write a whole dword, thus the special function.
1920 */
1921IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1922{
1923#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1924 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1925#else
1926 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1927#endif
1928}
1929
1930
1931/**
1932 * Used by TB code to push unsigned 64-bit value onto a generic stack.
1933 */
1934IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1935{
1936#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1937 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1938#else
1939 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1940#endif
1941}
1942
1943
1944/**
1945 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1946 */
1947IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1948{
1949#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1950 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1951#else
1952 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
1953#endif
1954}
1955
1956
1957/**
1958 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1959 */
1960IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1961{
1962#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1963 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
1964#else
1965 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
1966#endif
1967}
1968
1969
1970/**
1971 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
1972 */
1973IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1974{
1975#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1976 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
1977#else
1978 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
1979#endif
1980}
1981
1982
1983
1984/*********************************************************************************************************************************
1985* Helpers: Flat memory fetches and stores. *
1986*********************************************************************************************************************************/
1987
1988/**
1989 * Used by TB code to load unsigned 8-bit data w/ flat address.
1990 * @note Zero extending the value to 64-bit to simplify assembly.
1991 */
1992IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1993{
1994#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1995 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1996#else
1997 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1998#endif
1999}
2000
2001
2002/**
2003 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2004 * to 16 bits.
2005 * @note Zero extending the value to 64-bit to simplify assembly.
2006 */
2007IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2008{
2009#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2010 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2011#else
2012 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2013#endif
2014}
2015
2016
2017/**
2018 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2019 * to 32 bits.
2020 * @note Zero extending the value to 64-bit to simplify assembly.
2021 */
2022IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2023{
2024#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2025 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2026#else
2027 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2028#endif
2029}
2030
2031
2032/**
2033 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2034 * to 64 bits.
2035 */
2036IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2037{
2038#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2039 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2040#else
2041 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2042#endif
2043}
2044
2045
2046/**
2047 * Used by TB code to load unsigned 16-bit data w/ flat address.
2048 * @note Zero extending the value to 64-bit to simplify assembly.
2049 */
2050IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2051{
2052#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2053 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2054#else
2055 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2056#endif
2057}
2058
2059
2060/**
2061 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2062 * to 32 bits.
2063 * @note Zero extending the value to 64-bit to simplify assembly.
2064 */
2065IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2066{
2067#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2068 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2069#else
2070 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2071#endif
2072}
2073
2074
2075/**
2076 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2077 * to 64 bits.
2078 * @note Zero extending the value to 64-bit to simplify assembly.
2079 */
2080IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2081{
2082#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2083 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2084#else
2085 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2086#endif
2087}
2088
2089
2090/**
2091 * Used by TB code to load unsigned 32-bit data w/ flat address.
2092 * @note Zero extending the value to 64-bit to simplify assembly.
2093 */
2094IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2095{
2096#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2097 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2098#else
2099 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2100#endif
2101}
2102
2103
2104/**
2105 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
2106 * to 64 bits.
2107 * @note Zero extending the value to 64-bit to simplify assembly.
2108 */
2109IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2110{
2111#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2112 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2113#else
2114 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2115#endif
2116}
2117
2118
2119/**
2120 * Used by TB code to load unsigned 64-bit data w/ flat address.
2121 */
2122IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2123{
2124#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2125 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2126#else
2127 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
2128#endif
2129}
2130
2131
2132/**
2133 * Used by TB code to store unsigned 8-bit data w/ flat address.
2134 */
2135IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
2136{
2137#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2138 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
2139#else
2140 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
2141#endif
2142}
2143
2144
2145/**
2146 * Used by TB code to store unsigned 16-bit data w/ flat address.
2147 */
2148IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2149{
2150#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2151 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
2152#else
2153 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
2154#endif
2155}
2156
2157
2158/**
2159 * Used by TB code to store unsigned 32-bit data w/ flat address.
2160 */
2161IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2162{
2163#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2164 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
2165#else
2166 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
2167#endif
2168}
2169
2170
2171/**
2172 * Used by TB code to store unsigned 64-bit data w/ flat address.
2173 */
2174IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2175{
2176#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2177 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
2178#else
2179 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
2180#endif
2181}
2182
2183
2184
2185/**
2186 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
2187 */
2188IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2189{
2190#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2191 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2192#else
2193 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2194#endif
2195}
2196
2197
2198/**
2199 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
2200 */
2201IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2202{
2203#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2204 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2205#else
2206 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2207#endif
2208}
2209
2210
2211/**
2212 * Used by TB code to store a segment selector value onto a flat stack.
2213 *
2214 * Intel CPUs doesn't do write a whole dword, thus the special function.
2215 */
2216IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2217{
2218#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2219 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2220#else
2221 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2222#endif
2223}
2224
2225
2226/**
2227 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
2228 */
2229IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2230{
2231#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2232 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2233#else
2234 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2235#endif
2236}
2237
2238
2239/**
2240 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2241 */
2242IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2243{
2244#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2245 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2246#else
2247 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
2248#endif
2249}
2250
2251
2252/**
2253 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2254 */
2255IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2256{
2257#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2258 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2259#else
2260 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
2261#endif
2262}
2263
2264
2265/**
2266 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2267 */
2268IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2269{
2270#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2271 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2272#else
2273 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
2274#endif
2275}
2276
2277
2278
2279/*********************************************************************************************************************************
2280* Helpers: Segmented memory mapping. *
2281*********************************************************************************************************************************/
2282
2283/**
2284 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
2285 * segmentation.
2286 */
2287IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2288 RTGCPTR GCPtrMem, uint8_t iSegReg))
2289{
2290#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2291 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2292#else
2293 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2294#endif
2295}
2296
2297
2298/**
2299 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
2300 */
2301IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2302 RTGCPTR GCPtrMem, uint8_t iSegReg))
2303{
2304#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2305 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2306#else
2307 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2308#endif
2309}
2310
2311
2312/**
2313 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
2314 */
2315IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2316 RTGCPTR GCPtrMem, uint8_t iSegReg))
2317{
2318#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2319 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2320#else
2321 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2322#endif
2323}
2324
2325
2326/**
2327 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
2328 */
2329IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2330 RTGCPTR GCPtrMem, uint8_t iSegReg))
2331{
2332#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2333 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2334#else
2335 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2336#endif
2337}
2338
2339
2340/**
2341 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
2342 * segmentation.
2343 */
2344IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2345 RTGCPTR GCPtrMem, uint8_t iSegReg))
2346{
2347#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2348 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2349#else
2350 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2351#endif
2352}
2353
2354
2355/**
2356 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
2357 */
2358IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2359 RTGCPTR GCPtrMem, uint8_t iSegReg))
2360{
2361#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2362 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2363#else
2364 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2365#endif
2366}
2367
2368
2369/**
2370 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
2371 */
2372IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2373 RTGCPTR GCPtrMem, uint8_t iSegReg))
2374{
2375#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2376 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2377#else
2378 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2379#endif
2380}
2381
2382
2383/**
2384 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
2385 */
2386IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2387 RTGCPTR GCPtrMem, uint8_t iSegReg))
2388{
2389#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2390 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2391#else
2392 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2393#endif
2394}
2395
2396
2397/**
2398 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
2399 * segmentation.
2400 */
2401IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2402 RTGCPTR GCPtrMem, uint8_t iSegReg))
2403{
2404#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2405 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2406#else
2407 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2408#endif
2409}
2410
2411
2412/**
2413 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
2414 */
2415IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2416 RTGCPTR GCPtrMem, uint8_t iSegReg))
2417{
2418#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2419 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2420#else
2421 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2422#endif
2423}
2424
2425
2426/**
2427 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
2428 */
2429IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2430 RTGCPTR GCPtrMem, uint8_t iSegReg))
2431{
2432#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2433 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2434#else
2435 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2436#endif
2437}
2438
2439
2440/**
2441 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
2442 */
2443IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2444 RTGCPTR GCPtrMem, uint8_t iSegReg))
2445{
2446#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2447 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2448#else
2449 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2450#endif
2451}
2452
2453
2454/**
2455 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
2456 * segmentation.
2457 */
2458IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2459 RTGCPTR GCPtrMem, uint8_t iSegReg))
2460{
2461#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2462 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2463#else
2464 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2465#endif
2466}
2467
2468
2469/**
2470 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
2471 */
2472IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2473 RTGCPTR GCPtrMem, uint8_t iSegReg))
2474{
2475#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2476 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2477#else
2478 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2479#endif
2480}
2481
2482
2483/**
2484 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
2485 */
2486IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2487 RTGCPTR GCPtrMem, uint8_t iSegReg))
2488{
2489#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2490 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2491#else
2492 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2493#endif
2494}
2495
2496
2497/**
2498 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
2499 */
2500IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2501 RTGCPTR GCPtrMem, uint8_t iSegReg))
2502{
2503#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2504 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2505#else
2506 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2507#endif
2508}
2509
2510
2511/**
2512 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
2513 */
2514IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2515 RTGCPTR GCPtrMem, uint8_t iSegReg))
2516{
2517#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2518 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2519#else
2520 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2521#endif
2522}
2523
2524
2525/**
2526 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
2527 */
2528IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2529 RTGCPTR GCPtrMem, uint8_t iSegReg))
2530{
2531#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2532 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2533#else
2534 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2535#endif
2536}
2537
2538
2539/**
2540 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
2541 * segmentation.
2542 */
2543IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2544 RTGCPTR GCPtrMem, uint8_t iSegReg))
2545{
2546#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2547 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2548#else
2549 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2550#endif
2551}
2552
2553
2554/**
2555 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
2556 */
2557IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2558 RTGCPTR GCPtrMem, uint8_t iSegReg))
2559{
2560#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2561 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2562#else
2563 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2564#endif
2565}
2566
2567
2568/**
2569 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
2570 */
2571IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2572 RTGCPTR GCPtrMem, uint8_t iSegReg))
2573{
2574#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2575 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2576#else
2577 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2578#endif
2579}
2580
2581
2582/**
2583 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
2584 */
2585IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2586 RTGCPTR GCPtrMem, uint8_t iSegReg))
2587{
2588#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2589 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2590#else
2591 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2592#endif
2593}
2594
2595
2596/*********************************************************************************************************************************
2597* Helpers: Flat memory mapping. *
2598*********************************************************************************************************************************/
2599
2600/**
2601 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
2602 * address.
2603 */
2604IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2605{
2606#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2607 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2608#else
2609 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2610#endif
2611}
2612
2613
2614/**
2615 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
2616 */
2617IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2618{
2619#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2620 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2621#else
2622 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2623#endif
2624}
2625
2626
2627/**
2628 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
2629 */
2630IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2631{
2632#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2633 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2634#else
2635 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2636#endif
2637}
2638
2639
2640/**
2641 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
2642 */
2643IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2644{
2645#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2646 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2647#else
2648 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2649#endif
2650}
2651
2652
2653/**
2654 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
2655 * address.
2656 */
2657IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2658{
2659#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2660 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2661#else
2662 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2663#endif
2664}
2665
2666
2667/**
2668 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
2669 */
2670IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2671{
2672#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2673 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2674#else
2675 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2676#endif
2677}
2678
2679
2680/**
2681 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
2682 */
2683IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2684{
2685#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2686 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2687#else
2688 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2689#endif
2690}
2691
2692
2693/**
2694 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
2695 */
2696IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2697{
2698#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2699 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2700#else
2701 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2702#endif
2703}
2704
2705
2706/**
2707 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
2708 * address.
2709 */
2710IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2711{
2712#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2713 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2714#else
2715 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2716#endif
2717}
2718
2719
2720/**
2721 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
2722 */
2723IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2724{
2725#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2726 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2727#else
2728 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2729#endif
2730}
2731
2732
2733/**
2734 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
2735 */
2736IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2737{
2738#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2739 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2740#else
2741 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2742#endif
2743}
2744
2745
2746/**
2747 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
2748 */
2749IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2750{
2751#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2752 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2753#else
2754 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2755#endif
2756}
2757
2758
2759/**
2760 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
2761 * address.
2762 */
2763IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2764{
2765#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2766 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2767#else
2768 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2769#endif
2770}
2771
2772
2773/**
2774 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
2775 */
2776IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2777{
2778#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2779 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2780#else
2781 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2782#endif
2783}
2784
2785
2786/**
2787 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
2788 */
2789IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2790{
2791#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2792 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2793#else
2794 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2795#endif
2796}
2797
2798
2799/**
2800 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
2801 */
2802IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2803{
2804#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2805 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2806#else
2807 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2808#endif
2809}
2810
2811
2812/**
2813 * Used by TB code to map 80-bit float data writeonly w/ flat address.
2814 */
2815IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2816{
2817#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2818 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2819#else
2820 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2821#endif
2822}
2823
2824
2825/**
2826 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
2827 */
2828IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2829{
2830#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2831 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2832#else
2833 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2834#endif
2835}
2836
2837
2838/**
2839 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
2840 * address.
2841 */
2842IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2843{
2844#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2845 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2846#else
2847 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2848#endif
2849}
2850
2851
2852/**
2853 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
2854 */
2855IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2856{
2857#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2858 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2859#else
2860 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2861#endif
2862}
2863
2864
2865/**
2866 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
2867 */
2868IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2869{
2870#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2871 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2872#else
2873 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2874#endif
2875}
2876
2877
2878/**
2879 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
2880 */
2881IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2882{
2883#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2884 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2885#else
2886 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2887#endif
2888}
2889
2890
2891/*********************************************************************************************************************************
2892* Helpers: Commit, rollback & unmap *
2893*********************************************************************************************************************************/
2894
2895/**
2896 * Used by TB code to commit and unmap a read-write memory mapping.
2897 */
2898IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2899{
2900 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
2901}
2902
2903
2904/**
2905 * Used by TB code to commit and unmap a read-write memory mapping.
2906 */
2907IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2908{
2909 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2910}
2911
2912
2913/**
2914 * Used by TB code to commit and unmap a write-only memory mapping.
2915 */
2916IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2917{
2918 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2919}
2920
2921
2922/**
2923 * Used by TB code to commit and unmap a read-only memory mapping.
2924 */
2925IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2926{
2927 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2928}
2929
2930
2931/**
2932 * Reinitializes the native recompiler state.
2933 *
2934 * Called before starting a new recompile job.
2935 */
2936static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2937{
2938 pReNative->cLabels = 0;
2939 pReNative->bmLabelTypes = 0;
2940 pReNative->cFixups = 0;
2941#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2942 pReNative->pDbgInfo->cEntries = 0;
2943#endif
2944 pReNative->pTbOrg = pTb;
2945 pReNative->cCondDepth = 0;
2946 pReNative->uCondSeqNo = 0;
2947 pReNative->uCheckIrqSeqNo = 0;
2948 pReNative->uTlbSeqNo = 0;
2949
2950#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2951 pReNative->Core.offPc = 0;
2952 pReNative->Core.cInstrPcUpdateSkipped = 0;
2953#endif
2954#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2955 pReNative->fSimdRaiseXcptChecksEmitted = 0;
2956#endif
2957 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2958#if IEMNATIVE_HST_GREG_COUNT < 32
2959 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2960#endif
2961 ;
2962 pReNative->Core.bmHstRegsWithGstShadow = 0;
2963 pReNative->Core.bmGstRegShadows = 0;
2964 pReNative->Core.bmVars = 0;
2965 pReNative->Core.bmStack = 0;
2966 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
2967 pReNative->Core.u64ArgVars = UINT64_MAX;
2968
2969 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 14);
2970 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
2971 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
2972 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
2973 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
2974 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
2975 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
2976 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
2977 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
2978 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
2979 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
2980 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
2981 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
2982 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
2983 pReNative->aidxUniqueLabels[13] = UINT32_MAX;
2984
2985 /* Full host register reinit: */
2986 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
2987 {
2988 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
2989 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
2990 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
2991 }
2992
2993 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
2994 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
2995#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2996 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
2997#endif
2998#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2999 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
3000#endif
3001#ifdef IEMNATIVE_REG_FIXED_TMP1
3002 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
3003#endif
3004#ifdef IEMNATIVE_REG_FIXED_PC_DBG
3005 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
3006#endif
3007 );
3008 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3009 {
3010 fRegs &= ~RT_BIT_32(idxReg);
3011 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
3012 }
3013
3014 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
3015#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3016 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
3017#endif
3018#ifdef IEMNATIVE_REG_FIXED_TMP0
3019 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3020#endif
3021#ifdef IEMNATIVE_REG_FIXED_TMP1
3022 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
3023#endif
3024#ifdef IEMNATIVE_REG_FIXED_PC_DBG
3025 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
3026#endif
3027
3028#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3029# ifdef RT_ARCH_ARM64
3030 /*
3031 * Arm64 has 32 128-bit registers only, in order to support emulating 256-bit registers we pair
3032 * two real registers statically to one virtual for now, leaving us with only 16 256-bit registers.
3033 * We always pair v0 with v1, v2 with v3, etc. so we mark the higher register as fixed here during init
3034 * and the register allocator assumes that it will be always free when the lower is picked.
3035 */
3036 uint32_t const fFixedAdditional = UINT32_C(0xaaaaaaaa);
3037# else
3038 uint32_t const fFixedAdditional = 0;
3039# endif
3040
3041 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
3042 | fFixedAdditional
3043# if IEMNATIVE_HST_SIMD_REG_COUNT < 32
3044 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
3045# endif
3046 ;
3047 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
3048 pReNative->Core.bmGstSimdRegShadows = 0;
3049 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
3050 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
3051
3052 /* Full host register reinit: */
3053 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
3054 {
3055 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
3056 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
3057 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
3058 }
3059
3060 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK | fFixedAdditional;
3061 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3062 {
3063 fRegs &= ~RT_BIT_32(idxReg);
3064 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
3065 }
3066
3067#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
3068 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3069#endif
3070
3071#endif
3072
3073 return pReNative;
3074}
3075
3076
3077/**
3078 * Allocates and initializes the native recompiler state.
3079 *
3080 * This is called the first time an EMT wants to recompile something.
3081 *
3082 * @returns Pointer to the new recompiler state.
3083 * @param pVCpu The cross context virtual CPU structure of the calling
3084 * thread.
3085 * @param pTb The TB that's about to be recompiled.
3086 * @thread EMT(pVCpu)
3087 */
3088static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
3089{
3090 VMCPU_ASSERT_EMT(pVCpu);
3091
3092 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
3093 AssertReturn(pReNative, NULL);
3094
3095 /*
3096 * Try allocate all the buffers and stuff we need.
3097 */
3098 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
3099 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
3100 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
3101#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3102 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
3103#endif
3104 if (RT_LIKELY( pReNative->pInstrBuf
3105 && pReNative->paLabels
3106 && pReNative->paFixups)
3107#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3108 && pReNative->pDbgInfo
3109#endif
3110 )
3111 {
3112 /*
3113 * Set the buffer & array sizes on success.
3114 */
3115 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
3116 pReNative->cLabelsAlloc = _8K;
3117 pReNative->cFixupsAlloc = _16K;
3118#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3119 pReNative->cDbgInfoAlloc = _16K;
3120#endif
3121
3122 /* Other constant stuff: */
3123 pReNative->pVCpu = pVCpu;
3124
3125 /*
3126 * Done, just need to save it and reinit it.
3127 */
3128 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
3129 return iemNativeReInit(pReNative, pTb);
3130 }
3131
3132 /*
3133 * Failed. Cleanup and return.
3134 */
3135 AssertFailed();
3136 RTMemFree(pReNative->pInstrBuf);
3137 RTMemFree(pReNative->paLabels);
3138 RTMemFree(pReNative->paFixups);
3139#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3140 RTMemFree(pReNative->pDbgInfo);
3141#endif
3142 RTMemFree(pReNative);
3143 return NULL;
3144}
3145
3146
3147/**
3148 * Creates a label
3149 *
3150 * If the label does not yet have a defined position,
3151 * call iemNativeLabelDefine() later to set it.
3152 *
3153 * @returns Label ID. Throws VBox status code on failure, so no need to check
3154 * the return value.
3155 * @param pReNative The native recompile state.
3156 * @param enmType The label type.
3157 * @param offWhere The instruction offset of the label. UINT32_MAX if the
3158 * label is not yet defined (default).
3159 * @param uData Data associated with the lable. Only applicable to
3160 * certain type of labels. Default is zero.
3161 */
3162DECL_HIDDEN_THROW(uint32_t)
3163iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3164 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
3165{
3166 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
3167
3168 /*
3169 * Locate existing label definition.
3170 *
3171 * This is only allowed for forward declarations where offWhere=UINT32_MAX
3172 * and uData is zero.
3173 */
3174 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3175 uint32_t const cLabels = pReNative->cLabels;
3176 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
3177#ifndef VBOX_STRICT
3178 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
3179 && offWhere == UINT32_MAX
3180 && uData == 0
3181#endif
3182 )
3183 {
3184#ifndef VBOX_STRICT
3185 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
3186 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3187 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
3188 if (idxLabel < pReNative->cLabels)
3189 return idxLabel;
3190#else
3191 for (uint32_t i = 0; i < cLabels; i++)
3192 if ( paLabels[i].enmType == enmType
3193 && paLabels[i].uData == uData)
3194 {
3195 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3196 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3197 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
3198 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
3199 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3200 return i;
3201 }
3202 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
3203 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3204#endif
3205 }
3206
3207 /*
3208 * Make sure we've got room for another label.
3209 */
3210 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
3211 { /* likely */ }
3212 else
3213 {
3214 uint32_t cNew = pReNative->cLabelsAlloc;
3215 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3216 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3217 cNew *= 2;
3218 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
3219 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
3220 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
3221 pReNative->paLabels = paLabels;
3222 pReNative->cLabelsAlloc = cNew;
3223 }
3224
3225 /*
3226 * Define a new label.
3227 */
3228 paLabels[cLabels].off = offWhere;
3229 paLabels[cLabels].enmType = enmType;
3230 paLabels[cLabels].uData = uData;
3231 pReNative->cLabels = cLabels + 1;
3232
3233 Assert((unsigned)enmType < 64);
3234 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
3235
3236 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3237 {
3238 Assert(uData == 0);
3239 pReNative->aidxUniqueLabels[enmType] = cLabels;
3240 }
3241
3242 if (offWhere != UINT32_MAX)
3243 {
3244#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3245 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3246 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
3247#endif
3248 }
3249 return cLabels;
3250}
3251
3252
3253/**
3254 * Defines the location of an existing label.
3255 *
3256 * @param pReNative The native recompile state.
3257 * @param idxLabel The label to define.
3258 * @param offWhere The position.
3259 */
3260DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
3261{
3262 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
3263 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
3264 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
3265 pLabel->off = offWhere;
3266#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3267 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3268 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
3269#endif
3270}
3271
3272
3273/**
3274 * Looks up a lable.
3275 *
3276 * @returns Label ID if found, UINT32_MAX if not.
3277 */
3278static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3279 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
3280{
3281 Assert((unsigned)enmType < 64);
3282 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
3283 {
3284 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3285 return pReNative->aidxUniqueLabels[enmType];
3286
3287 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3288 uint32_t const cLabels = pReNative->cLabels;
3289 for (uint32_t i = 0; i < cLabels; i++)
3290 if ( paLabels[i].enmType == enmType
3291 && paLabels[i].uData == uData
3292 && ( paLabels[i].off == offWhere
3293 || offWhere == UINT32_MAX
3294 || paLabels[i].off == UINT32_MAX))
3295 return i;
3296 }
3297 return UINT32_MAX;
3298}
3299
3300
3301/**
3302 * Adds a fixup.
3303 *
3304 * @throws VBox status code (int) on failure.
3305 * @param pReNative The native recompile state.
3306 * @param offWhere The instruction offset of the fixup location.
3307 * @param idxLabel The target label ID for the fixup.
3308 * @param enmType The fixup type.
3309 * @param offAddend Fixup addend if applicable to the type. Default is 0.
3310 */
3311DECL_HIDDEN_THROW(void)
3312iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
3313 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
3314{
3315 Assert(idxLabel <= UINT16_MAX);
3316 Assert((unsigned)enmType <= UINT8_MAX);
3317
3318 /*
3319 * Make sure we've room.
3320 */
3321 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
3322 uint32_t const cFixups = pReNative->cFixups;
3323 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
3324 { /* likely */ }
3325 else
3326 {
3327 uint32_t cNew = pReNative->cFixupsAlloc;
3328 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3329 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3330 cNew *= 2;
3331 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
3332 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
3333 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
3334 pReNative->paFixups = paFixups;
3335 pReNative->cFixupsAlloc = cNew;
3336 }
3337
3338 /*
3339 * Add the fixup.
3340 */
3341 paFixups[cFixups].off = offWhere;
3342 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
3343 paFixups[cFixups].enmType = enmType;
3344 paFixups[cFixups].offAddend = offAddend;
3345 pReNative->cFixups = cFixups + 1;
3346}
3347
3348
3349/**
3350 * Slow code path for iemNativeInstrBufEnsure.
3351 */
3352DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
3353{
3354 /* Double the buffer size till we meet the request. */
3355 uint32_t cNew = pReNative->cInstrBufAlloc;
3356 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
3357 do
3358 cNew *= 2;
3359 while (cNew < off + cInstrReq);
3360
3361 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
3362#ifdef RT_ARCH_ARM64
3363 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
3364#else
3365 uint32_t const cbMaxInstrBuf = _2M;
3366#endif
3367 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
3368
3369 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
3370 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
3371
3372#ifdef VBOX_STRICT
3373 pReNative->offInstrBufChecked = off + cInstrReq;
3374#endif
3375 pReNative->cInstrBufAlloc = cNew;
3376 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
3377}
3378
3379#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3380
3381/**
3382 * Grows the static debug info array used during recompilation.
3383 *
3384 * @returns Pointer to the new debug info block; throws VBox status code on
3385 * failure, so no need to check the return value.
3386 */
3387DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3388{
3389 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
3390 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
3391 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
3392 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
3393 pReNative->pDbgInfo = pDbgInfo;
3394 pReNative->cDbgInfoAlloc = cNew;
3395 return pDbgInfo;
3396}
3397
3398
3399/**
3400 * Adds a new debug info uninitialized entry, returning the pointer to it.
3401 */
3402DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3403{
3404 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
3405 { /* likely */ }
3406 else
3407 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
3408 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
3409}
3410
3411
3412/**
3413 * Debug Info: Adds a native offset record, if necessary.
3414 */
3415DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3416{
3417 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
3418
3419 /*
3420 * Search backwards to see if we've got a similar record already.
3421 */
3422 uint32_t idx = pDbgInfo->cEntries;
3423 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
3424 while (idx-- > idxStop)
3425 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
3426 {
3427 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
3428 return;
3429 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
3430 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
3431 break;
3432 }
3433
3434 /*
3435 * Add it.
3436 */
3437 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
3438 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
3439 pEntry->NativeOffset.offNative = off;
3440}
3441
3442
3443/**
3444 * Debug Info: Record info about a label.
3445 */
3446static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
3447{
3448 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3449 pEntry->Label.uType = kIemTbDbgEntryType_Label;
3450 pEntry->Label.uUnused = 0;
3451 pEntry->Label.enmLabel = (uint8_t)enmType;
3452 pEntry->Label.uData = uData;
3453}
3454
3455
3456/**
3457 * Debug Info: Record info about a threaded call.
3458 */
3459static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
3460{
3461 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3462 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
3463 pEntry->ThreadedCall.fRecompiled = fRecompiled;
3464 pEntry->ThreadedCall.uUnused = 0;
3465 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
3466}
3467
3468
3469/**
3470 * Debug Info: Record info about a new guest instruction.
3471 */
3472static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
3473{
3474 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3475 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
3476 pEntry->GuestInstruction.uUnused = 0;
3477 pEntry->GuestInstruction.fExec = fExec;
3478}
3479
3480
3481/**
3482 * Debug Info: Record info about guest register shadowing.
3483 */
3484DECL_HIDDEN_THROW(void)
3485iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
3486 uint8_t idxHstReg /*= UINT8_MAX*/, uint8_t idxHstRegPrev /*= UINT8_MAX*/)
3487{
3488 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3489 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
3490 pEntry->GuestRegShadowing.uUnused = 0;
3491 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
3492 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
3493 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
3494}
3495
3496
3497# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3498/**
3499 * Debug Info: Record info about guest register shadowing.
3500 */
3501DECL_HIDDEN_THROW(void)
3502iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
3503 uint8_t idxHstSimdReg /*= UINT8_MAX*/, uint8_t idxHstSimdRegPrev /*= UINT8_MAX*/)
3504{
3505 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3506 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
3507 pEntry->GuestSimdRegShadowing.uUnused = 0;
3508 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
3509 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
3510 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
3511}
3512# endif
3513
3514
3515# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3516/**
3517 * Debug Info: Record info about delayed RIP updates.
3518 */
3519DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t offPc, uint32_t cInstrSkipped)
3520{
3521 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3522 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
3523 pEntry->DelayedPcUpdate.offPc = offPc;
3524 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
3525}
3526# endif
3527
3528#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
3529
3530
3531/*********************************************************************************************************************************
3532* Register Allocator *
3533*********************************************************************************************************************************/
3534
3535/**
3536 * Register parameter indexes (indexed by argument number).
3537 */
3538DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
3539{
3540 IEMNATIVE_CALL_ARG0_GREG,
3541 IEMNATIVE_CALL_ARG1_GREG,
3542 IEMNATIVE_CALL_ARG2_GREG,
3543 IEMNATIVE_CALL_ARG3_GREG,
3544#if defined(IEMNATIVE_CALL_ARG4_GREG)
3545 IEMNATIVE_CALL_ARG4_GREG,
3546# if defined(IEMNATIVE_CALL_ARG5_GREG)
3547 IEMNATIVE_CALL_ARG5_GREG,
3548# if defined(IEMNATIVE_CALL_ARG6_GREG)
3549 IEMNATIVE_CALL_ARG6_GREG,
3550# if defined(IEMNATIVE_CALL_ARG7_GREG)
3551 IEMNATIVE_CALL_ARG7_GREG,
3552# endif
3553# endif
3554# endif
3555#endif
3556};
3557AssertCompile(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3558
3559/**
3560 * Call register masks indexed by argument count.
3561 */
3562DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
3563{
3564 0,
3565 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
3566 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
3567 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
3568 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3569 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
3570#if defined(IEMNATIVE_CALL_ARG4_GREG)
3571 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3572 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
3573# if defined(IEMNATIVE_CALL_ARG5_GREG)
3574 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3575 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
3576# if defined(IEMNATIVE_CALL_ARG6_GREG)
3577 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3578 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3579 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
3580# if defined(IEMNATIVE_CALL_ARG7_GREG)
3581 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3582 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3583 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
3584# endif
3585# endif
3586# endif
3587#endif
3588};
3589
3590#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
3591/**
3592 * BP offset of the stack argument slots.
3593 *
3594 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
3595 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
3596 */
3597DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
3598{
3599 IEMNATIVE_FP_OFF_STACK_ARG0,
3600# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
3601 IEMNATIVE_FP_OFF_STACK_ARG1,
3602# endif
3603# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
3604 IEMNATIVE_FP_OFF_STACK_ARG2,
3605# endif
3606# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
3607 IEMNATIVE_FP_OFF_STACK_ARG3,
3608# endif
3609};
3610AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
3611#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
3612
3613/**
3614 * Info about shadowed guest register values.
3615 * @see IEMNATIVEGSTREG
3616 */
3617DECL_HIDDEN_CONST(IEMANTIVEGSTREGINFO const) g_aGstShadowInfo[] =
3618{
3619#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
3620 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
3621 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
3622 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
3623 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
3624 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
3625 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
3626 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
3627 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
3628 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
3629 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
3630 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
3631 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
3632 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
3633 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
3634 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
3635 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
3636 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
3637 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
3638 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
3639 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
3640 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
3641 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
3642 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
3643 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
3644 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
3645 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
3646 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
3647 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
3648 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
3649 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
3650 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
3651 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
3652 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
3653 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
3654 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
3655 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
3656 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
3657 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
3658 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
3659 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
3660 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
3661 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
3662 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
3663 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
3664 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
3665 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
3666 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
3667 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
3668#undef CPUMCTX_OFF_AND_SIZE
3669};
3670AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
3671
3672
3673/** Host CPU general purpose register names. */
3674DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
3675{
3676#ifdef RT_ARCH_AMD64
3677 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
3678#elif RT_ARCH_ARM64
3679 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
3680 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
3681#else
3682# error "port me"
3683#endif
3684};
3685
3686
3687#if 0 /* unused */
3688/**
3689 * Tries to locate a suitable register in the given register mask.
3690 *
3691 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3692 * failed.
3693 *
3694 * @returns Host register number on success, returns UINT8_MAX on failure.
3695 */
3696static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
3697{
3698 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3699 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3700 if (fRegs)
3701 {
3702 /** @todo pick better here: */
3703 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
3704
3705 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3706 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3707 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3708 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3709
3710 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3711 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3712 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3713 return idxReg;
3714 }
3715 return UINT8_MAX;
3716}
3717#endif /* unused */
3718
3719
3720/**
3721 * Locate a register, possibly freeing one up.
3722 *
3723 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3724 * failed.
3725 *
3726 * @returns Host register number on success. Returns UINT8_MAX if no registers
3727 * found, the caller is supposed to deal with this and raise a
3728 * allocation type specific status code (if desired).
3729 *
3730 * @throws VBox status code if we're run into trouble spilling a variable of
3731 * recording debug info. Does NOT throw anything if we're out of
3732 * registers, though.
3733 */
3734static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3735 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3736{
3737 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
3738 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3739 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3740
3741 /*
3742 * Try a freed register that's shadowing a guest register.
3743 */
3744 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3745 if (fRegs)
3746 {
3747 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
3748
3749#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3750 /*
3751 * When we have livness information, we use it to kick out all shadowed
3752 * guest register that will not be needed any more in this TB. If we're
3753 * lucky, this may prevent us from ending up here again.
3754 *
3755 * Note! We must consider the previous entry here so we don't free
3756 * anything that the current threaded function requires (current
3757 * entry is produced by the next threaded function).
3758 */
3759 uint32_t const idxCurCall = pReNative->idxCurCall;
3760 if (idxCurCall > 0)
3761 {
3762 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
3763
3764# ifndef IEMLIVENESS_EXTENDED_LAYOUT
3765 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
3766 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
3767 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
3768#else
3769 /* Construct a mask of the registers not in the read or write state.
3770 Note! We could skips writes, if they aren't from us, as this is just
3771 a hack to prevent trashing registers that have just been written
3772 or will be written when we retire the current instruction. */
3773 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3774 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3775 & IEMLIVENESSBIT_MASK;
3776#endif
3777 /* Merge EFLAGS. */
3778 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
3779 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
3780 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
3781 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
3782 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
3783
3784 /* If it matches any shadowed registers. */
3785 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
3786 {
3787 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
3788 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
3789 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
3790
3791 /* See if we've got any unshadowed registers we can return now. */
3792 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
3793 if (fUnshadowedRegs)
3794 {
3795 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
3796 return (fPreferVolatile
3797 ? ASMBitFirstSetU32(fUnshadowedRegs)
3798 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3799 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
3800 - 1;
3801 }
3802 }
3803 }
3804#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
3805
3806 unsigned const idxReg = (fPreferVolatile
3807 ? ASMBitFirstSetU32(fRegs)
3808 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3809 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
3810 - 1;
3811
3812 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3813 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3814 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3815 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3816
3817 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3818 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3819 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3820 return idxReg;
3821 }
3822
3823 /*
3824 * Try free up a variable that's in a register.
3825 *
3826 * We do two rounds here, first evacuating variables we don't need to be
3827 * saved on the stack, then in the second round move things to the stack.
3828 */
3829 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
3830 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3831 {
3832 uint32_t fVars = pReNative->Core.bmVars;
3833 while (fVars)
3834 {
3835 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3836 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3837 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3838 && (RT_BIT_32(idxReg) & fRegMask)
3839 && ( iLoop == 0
3840 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3841 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3842 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3843 {
3844 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3845 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3846 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3847 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3848 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3849 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3850
3851 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3852 {
3853 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
3854 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3855 }
3856
3857 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3858 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3859
3860 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3861 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3862 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3863 return idxReg;
3864 }
3865 fVars &= ~RT_BIT_32(idxVar);
3866 }
3867 }
3868
3869 return UINT8_MAX;
3870}
3871
3872
3873/**
3874 * Reassigns a variable to a different register specified by the caller.
3875 *
3876 * @returns The new code buffer position.
3877 * @param pReNative The native recompile state.
3878 * @param off The current code buffer position.
3879 * @param idxVar The variable index.
3880 * @param idxRegOld The old host register number.
3881 * @param idxRegNew The new host register number.
3882 * @param pszCaller The caller for logging.
3883 */
3884static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3885 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3886{
3887 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3888 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
3889 RT_NOREF(pszCaller);
3890
3891 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3892
3893 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3894 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
3895 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3896 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3897
3898 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3899 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3900 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3901 if (fGstRegShadows)
3902 {
3903 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3904 | RT_BIT_32(idxRegNew);
3905 while (fGstRegShadows)
3906 {
3907 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3908 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3909
3910 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3911 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3912 }
3913 }
3914
3915 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
3916 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3917 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3918 return off;
3919}
3920
3921
3922/**
3923 * Moves a variable to a different register or spills it onto the stack.
3924 *
3925 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3926 * kinds can easily be recreated if needed later.
3927 *
3928 * @returns The new code buffer position.
3929 * @param pReNative The native recompile state.
3930 * @param off The current code buffer position.
3931 * @param idxVar The variable index.
3932 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3933 * call-volatile registers.
3934 */
3935DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3936 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_GREG_MASK*/)
3937{
3938 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3939 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3940 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
3941 Assert(!pVar->fRegAcquired);
3942
3943 uint8_t const idxRegOld = pVar->idxReg;
3944 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3945 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3946 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3947 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3948 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3949 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3950 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3951 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3952
3953
3954 /** @todo Add statistics on this.*/
3955 /** @todo Implement basic variable liveness analysis (python) so variables
3956 * can be freed immediately once no longer used. This has the potential to
3957 * be trashing registers and stack for dead variables.
3958 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
3959
3960 /*
3961 * First try move it to a different register, as that's cheaper.
3962 */
3963 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3964 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3965 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3966 if (fRegs)
3967 {
3968 /* Avoid using shadow registers, if possible. */
3969 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3970 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3971 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3972 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3973 }
3974
3975 /*
3976 * Otherwise we must spill the register onto the stack.
3977 */
3978 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3979 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3980 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3981 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3982
3983 pVar->idxReg = UINT8_MAX;
3984 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3985 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3986 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3987 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3988 return off;
3989}
3990
3991
3992/**
3993 * Allocates a temporary host general purpose register.
3994 *
3995 * This may emit code to save register content onto the stack in order to free
3996 * up a register.
3997 *
3998 * @returns The host register number; throws VBox status code on failure,
3999 * so no need to check the return value.
4000 * @param pReNative The native recompile state.
4001 * @param poff Pointer to the variable with the code buffer position.
4002 * This will be update if we need to move a variable from
4003 * register to stack in order to satisfy the request.
4004 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4005 * registers (@c true, default) or the other way around
4006 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4007 */
4008DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
4009{
4010 /*
4011 * Try find a completely unused register, preferably a call-volatile one.
4012 */
4013 uint8_t idxReg;
4014 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4015 & ~pReNative->Core.bmHstRegsWithGstShadow
4016 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
4017 if (fRegs)
4018 {
4019 if (fPreferVolatile)
4020 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4021 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4022 else
4023 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4024 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4025 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4026 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4027 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4028 }
4029 else
4030 {
4031 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
4032 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4033 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4034 }
4035 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4036}
4037
4038
4039/**
4040 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
4041 * registers.
4042 *
4043 * @returns The host register number; throws VBox status code on failure,
4044 * so no need to check the return value.
4045 * @param pReNative The native recompile state.
4046 * @param poff Pointer to the variable with the code buffer position.
4047 * This will be update if we need to move a variable from
4048 * register to stack in order to satisfy the request.
4049 * @param fRegMask Mask of acceptable registers.
4050 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4051 * registers (@c true, default) or the other way around
4052 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4053 */
4054DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
4055 bool fPreferVolatile /*= true*/)
4056{
4057 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
4058 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
4059
4060 /*
4061 * Try find a completely unused register, preferably a call-volatile one.
4062 */
4063 uint8_t idxReg;
4064 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4065 & ~pReNative->Core.bmHstRegsWithGstShadow
4066 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
4067 & fRegMask;
4068 if (fRegs)
4069 {
4070 if (fPreferVolatile)
4071 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4072 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4073 else
4074 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4075 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4076 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4077 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4078 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4079 }
4080 else
4081 {
4082 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
4083 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4084 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4085 }
4086 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4087}
4088
4089
4090/**
4091 * Allocates a temporary register for loading an immediate value into.
4092 *
4093 * This will emit code to load the immediate, unless there happens to be an
4094 * unused register with the value already loaded.
4095 *
4096 * The caller will not modify the returned register, it must be considered
4097 * read-only. Free using iemNativeRegFreeTmpImm.
4098 *
4099 * @returns The host register number; throws VBox status code on failure, so no
4100 * need to check the return value.
4101 * @param pReNative The native recompile state.
4102 * @param poff Pointer to the variable with the code buffer position.
4103 * @param uImm The immediate value that the register must hold upon
4104 * return.
4105 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4106 * registers (@c true, default) or the other way around
4107 * (@c false).
4108 *
4109 * @note Reusing immediate values has not been implemented yet.
4110 */
4111DECL_HIDDEN_THROW(uint8_t)
4112iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
4113{
4114 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
4115 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
4116 return idxReg;
4117}
4118
4119
4120/**
4121 * Allocates a temporary host general purpose register for keeping a guest
4122 * register value.
4123 *
4124 * Since we may already have a register holding the guest register value,
4125 * code will be emitted to do the loading if that's not the case. Code may also
4126 * be emitted if we have to free up a register to satify the request.
4127 *
4128 * @returns The host register number; throws VBox status code on failure, so no
4129 * need to check the return value.
4130 * @param pReNative The native recompile state.
4131 * @param poff Pointer to the variable with the code buffer
4132 * position. This will be update if we need to move a
4133 * variable from register to stack in order to satisfy
4134 * the request.
4135 * @param enmGstReg The guest register that will is to be updated.
4136 * @param enmIntendedUse How the caller will be using the host register.
4137 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
4138 * register is okay (default). The ASSUMPTION here is
4139 * that the caller has already flushed all volatile
4140 * registers, so this is only applied if we allocate a
4141 * new register.
4142 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
4143 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
4144 */
4145DECL_HIDDEN_THROW(uint8_t)
4146iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
4147 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
4148 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
4149{
4150 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4151#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4152 AssertMsg( fSkipLivenessAssert
4153 || pReNative->idxCurCall == 0
4154 || enmGstReg == kIemNativeGstReg_Pc
4155 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4156 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4157 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
4158 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4159 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
4160 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4161#endif
4162 RT_NOREF(fSkipLivenessAssert);
4163#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
4164 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
4165#endif
4166 uint32_t const fRegMask = !fNoVolatileRegs
4167 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
4168 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4169
4170 /*
4171 * First check if the guest register value is already in a host register.
4172 */
4173 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4174 {
4175 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4176 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4177 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4178 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4179
4180 /* It's not supposed to be allocated... */
4181 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4182 {
4183 /*
4184 * If the register will trash the guest shadow copy, try find a
4185 * completely unused register we can use instead. If that fails,
4186 * we need to disassociate the host reg from the guest reg.
4187 */
4188 /** @todo would be nice to know if preserving the register is in any way helpful. */
4189 /* If the purpose is calculations, try duplicate the register value as
4190 we'll be clobbering the shadow. */
4191 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
4192 && ( ~pReNative->Core.bmHstRegs
4193 & ~pReNative->Core.bmHstRegsWithGstShadow
4194 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
4195 {
4196 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
4197
4198 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4199
4200 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4201 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4202 g_apszIemNativeHstRegNames[idxRegNew]));
4203 idxReg = idxRegNew;
4204 }
4205 /* If the current register matches the restrictions, go ahead and allocate
4206 it for the caller. */
4207 else if (fRegMask & RT_BIT_32(idxReg))
4208 {
4209 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4210 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4211 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4212 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4213 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
4214 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4215 else
4216 {
4217 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
4218 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
4219 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4220 }
4221 }
4222 /* Otherwise, allocate a register that satisfies the caller and transfer
4223 the shadowing if compatible with the intended use. (This basically
4224 means the call wants a non-volatile register (RSP push/pop scenario).) */
4225 else
4226 {
4227 Assert(fNoVolatileRegs);
4228 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
4229 !fNoVolatileRegs
4230 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
4231 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4232 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4233 {
4234 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4235 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
4236 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
4237 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4238 }
4239 else
4240 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4241 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4242 g_apszIemNativeHstRegNames[idxRegNew]));
4243 idxReg = idxRegNew;
4244 }
4245 }
4246 else
4247 {
4248 /*
4249 * Oops. Shadowed guest register already allocated!
4250 *
4251 * Allocate a new register, copy the value and, if updating, the
4252 * guest shadow copy assignment to the new register.
4253 */
4254 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4255 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
4256 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
4257 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
4258
4259 /** @todo share register for readonly access. */
4260 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
4261 enmIntendedUse == kIemNativeGstRegUse_Calculation);
4262
4263 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4264 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4265
4266 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4267 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4268 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
4269 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4270 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4271 else
4272 {
4273 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4274 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
4275 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4276 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4277 }
4278 idxReg = idxRegNew;
4279 }
4280 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
4281
4282#ifdef VBOX_STRICT
4283 /* Strict builds: Check that the value is correct. */
4284 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4285#endif
4286
4287 return idxReg;
4288 }
4289
4290 /*
4291 * Allocate a new register, load it with the guest value and designate it as a copy of the
4292 */
4293 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
4294
4295 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4296 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
4297
4298 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4299 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
4300 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
4301 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4302
4303 return idxRegNew;
4304}
4305
4306
4307/**
4308 * Allocates a temporary host general purpose register that already holds the
4309 * given guest register value.
4310 *
4311 * The use case for this function is places where the shadowing state cannot be
4312 * modified due to branching and such. This will fail if the we don't have a
4313 * current shadow copy handy or if it's incompatible. The only code that will
4314 * be emitted here is value checking code in strict builds.
4315 *
4316 * The intended use can only be readonly!
4317 *
4318 * @returns The host register number, UINT8_MAX if not present.
4319 * @param pReNative The native recompile state.
4320 * @param poff Pointer to the instruction buffer offset.
4321 * Will be updated in strict builds if a register is
4322 * found.
4323 * @param enmGstReg The guest register that will is to be updated.
4324 * @note In strict builds, this may throw instruction buffer growth failures.
4325 * Non-strict builds will not throw anything.
4326 * @sa iemNativeRegAllocTmpForGuestReg
4327 */
4328DECL_HIDDEN_THROW(uint8_t)
4329iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4330{
4331 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4332#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4333 AssertMsg( pReNative->idxCurCall == 0
4334 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4335 || enmGstReg == kIemNativeGstReg_Pc,
4336 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4337#endif
4338
4339 /*
4340 * First check if the guest register value is already in a host register.
4341 */
4342 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4343 {
4344 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4345 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4346 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4347 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4348
4349 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4350 {
4351 /*
4352 * We only do readonly use here, so easy compared to the other
4353 * variant of this code.
4354 */
4355 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4356 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4357 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4358 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
4359 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4360
4361#ifdef VBOX_STRICT
4362 /* Strict builds: Check that the value is correct. */
4363 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4364#else
4365 RT_NOREF(poff);
4366#endif
4367 return idxReg;
4368 }
4369 }
4370
4371 return UINT8_MAX;
4372}
4373
4374
4375/**
4376 * Allocates argument registers for a function call.
4377 *
4378 * @returns New code buffer offset on success; throws VBox status code on failure, so no
4379 * need to check the return value.
4380 * @param pReNative The native recompile state.
4381 * @param off The current code buffer offset.
4382 * @param cArgs The number of arguments the function call takes.
4383 */
4384DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
4385{
4386 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
4387 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
4388 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4389 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4390
4391 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4392 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4393 else if (cArgs == 0)
4394 return true;
4395
4396 /*
4397 * Do we get luck and all register are free and not shadowing anything?
4398 */
4399 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
4400 for (uint32_t i = 0; i < cArgs; i++)
4401 {
4402 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4403 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4404 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4405 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4406 }
4407 /*
4408 * Okay, not lucky so we have to free up the registers.
4409 */
4410 else
4411 for (uint32_t i = 0; i < cArgs; i++)
4412 {
4413 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4414 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
4415 {
4416 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4417 {
4418 case kIemNativeWhat_Var:
4419 {
4420 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4421 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4422 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
4423 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4424 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
4425
4426 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
4427 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4428 else
4429 {
4430 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4431 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4432 }
4433 break;
4434 }
4435
4436 case kIemNativeWhat_Tmp:
4437 case kIemNativeWhat_Arg:
4438 case kIemNativeWhat_rc:
4439 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4440 default:
4441 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
4442 }
4443
4444 }
4445 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4446 {
4447 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4448 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4449 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4450 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4451 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4452 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4453 }
4454 else
4455 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4456 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4457 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4458 }
4459 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
4460 return true;
4461}
4462
4463
4464DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
4465
4466
4467#if 0
4468/**
4469 * Frees a register assignment of any type.
4470 *
4471 * @param pReNative The native recompile state.
4472 * @param idxHstReg The register to free.
4473 *
4474 * @note Does not update variables.
4475 */
4476DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4477{
4478 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4479 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4480 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
4481 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
4482 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
4483 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
4484 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
4485 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
4486 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
4487 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
4488 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4489 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4490 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4491 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4492
4493 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4494 /* no flushing, right:
4495 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4496 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4497 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4498 */
4499}
4500#endif
4501
4502
4503/**
4504 * Frees a temporary register.
4505 *
4506 * Any shadow copies of guest registers assigned to the host register will not
4507 * be flushed by this operation.
4508 */
4509DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4510{
4511 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4512 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
4513 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4514 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
4515 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4516}
4517
4518
4519/**
4520 * Frees a temporary immediate register.
4521 *
4522 * It is assumed that the call has not modified the register, so it still hold
4523 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
4524 */
4525DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4526{
4527 iemNativeRegFreeTmp(pReNative, idxHstReg);
4528}
4529
4530
4531/**
4532 * Frees a register assigned to a variable.
4533 *
4534 * The register will be disassociated from the variable.
4535 */
4536DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4537{
4538 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4539 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4540 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4541 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4542 Assert(pReNative->Core.aVars[idxVar].idxReg == idxHstReg);
4543
4544 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4545 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4546 if (!fFlushShadows)
4547 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4548 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4549 else
4550 {
4551 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4552 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4553 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4554 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4555 uint64_t fGstRegShadows = fGstRegShadowsOld;
4556 while (fGstRegShadows)
4557 {
4558 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4559 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4560
4561 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4562 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4563 }
4564 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4565 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4566 }
4567}
4568
4569
4570/**
4571 * Called right before emitting a call instruction to move anything important
4572 * out of call-volatile registers, free and flush the call-volatile registers,
4573 * optionally freeing argument variables.
4574 *
4575 * @returns New code buffer offset, UINT32_MAX on failure.
4576 * @param pReNative The native recompile state.
4577 * @param off The code buffer offset.
4578 * @param cArgs The number of arguments the function call takes.
4579 * It is presumed that the host register part of these have
4580 * been allocated as such already and won't need moving,
4581 * just freeing.
4582 * @param fKeepVars Mask of variables that should keep their register
4583 * assignments. Caller must take care to handle these.
4584 */
4585DECL_HIDDEN_THROW(uint32_t)
4586iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4587{
4588 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4589
4590 /* fKeepVars will reduce this mask. */
4591 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4592
4593 /*
4594 * Move anything important out of volatile registers.
4595 */
4596 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4597 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4598 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4599#ifdef IEMNATIVE_REG_FIXED_TMP0
4600 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4601#endif
4602#ifdef IEMNATIVE_REG_FIXED_TMP1
4603 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
4604#endif
4605#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4606 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4607#endif
4608 & ~g_afIemNativeCallRegs[cArgs];
4609
4610 fRegsToMove &= pReNative->Core.bmHstRegs;
4611 if (!fRegsToMove)
4612 { /* likely */ }
4613 else
4614 {
4615 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4616 while (fRegsToMove != 0)
4617 {
4618 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4619 fRegsToMove &= ~RT_BIT_32(idxReg);
4620
4621 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4622 {
4623 case kIemNativeWhat_Var:
4624 {
4625 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4626 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4627 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4628 Assert(pVar->idxReg == idxReg);
4629 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4630 {
4631 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4632 idxVar, pVar->enmKind, pVar->idxReg));
4633 if (pVar->enmKind != kIemNativeVarKind_Stack)
4634 pVar->idxReg = UINT8_MAX;
4635 else
4636 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4637 }
4638 else
4639 fRegsToFree &= ~RT_BIT_32(idxReg);
4640 continue;
4641 }
4642
4643 case kIemNativeWhat_Arg:
4644 AssertMsgFailed(("What?!?: %u\n", idxReg));
4645 continue;
4646
4647 case kIemNativeWhat_rc:
4648 case kIemNativeWhat_Tmp:
4649 AssertMsgFailed(("Missing free: %u\n", idxReg));
4650 continue;
4651
4652 case kIemNativeWhat_FixedTmp:
4653 case kIemNativeWhat_pVCpuFixed:
4654 case kIemNativeWhat_pCtxFixed:
4655 case kIemNativeWhat_PcShadow:
4656 case kIemNativeWhat_FixedReserved:
4657 case kIemNativeWhat_Invalid:
4658 case kIemNativeWhat_End:
4659 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4660 }
4661 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4662 }
4663 }
4664
4665 /*
4666 * Do the actual freeing.
4667 */
4668 if (pReNative->Core.bmHstRegs & fRegsToFree)
4669 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4670 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4671 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4672
4673 /* If there are guest register shadows in any call-volatile register, we
4674 have to clear the corrsponding guest register masks for each register. */
4675 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4676 if (fHstRegsWithGstShadow)
4677 {
4678 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4679 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
4680 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4681 do
4682 {
4683 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4684 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4685
4686 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4687 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4688 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4689 } while (fHstRegsWithGstShadow != 0);
4690 }
4691
4692 return off;
4693}
4694
4695
4696/**
4697 * Flushes a set of guest register shadow copies.
4698 *
4699 * This is usually done after calling a threaded function or a C-implementation
4700 * of an instruction.
4701 *
4702 * @param pReNative The native recompile state.
4703 * @param fGstRegs Set of guest registers to flush.
4704 */
4705DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4706{
4707 /*
4708 * Reduce the mask by what's currently shadowed
4709 */
4710 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4711 fGstRegs &= bmGstRegShadowsOld;
4712 if (fGstRegs)
4713 {
4714 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4715 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4716 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4717 if (bmGstRegShadowsNew)
4718 {
4719 /*
4720 * Partial.
4721 */
4722 do
4723 {
4724 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4725 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4726 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4727 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4728 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4729
4730 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4731 fGstRegs &= ~fInThisHstReg;
4732 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4733 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4734 if (!fGstRegShadowsNew)
4735 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4736 } while (fGstRegs != 0);
4737 }
4738 else
4739 {
4740 /*
4741 * Clear all.
4742 */
4743 do
4744 {
4745 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4746 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4747 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4748 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4749 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4750
4751 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4752 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4753 } while (fGstRegs != 0);
4754 pReNative->Core.bmHstRegsWithGstShadow = 0;
4755 }
4756 }
4757}
4758
4759
4760/**
4761 * Flushes guest register shadow copies held by a set of host registers.
4762 *
4763 * This is used with the TLB lookup code for ensuring that we don't carry on
4764 * with any guest shadows in volatile registers, as these will get corrupted by
4765 * a TLB miss.
4766 *
4767 * @param pReNative The native recompile state.
4768 * @param fHstRegs Set of host registers to flush guest shadows for.
4769 */
4770DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
4771{
4772 /*
4773 * Reduce the mask by what's currently shadowed.
4774 */
4775 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
4776 fHstRegs &= bmHstRegsWithGstShadowOld;
4777 if (fHstRegs)
4778 {
4779 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
4780 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
4781 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
4782 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
4783 if (bmHstRegsWithGstShadowNew)
4784 {
4785 /*
4786 * Partial (likely).
4787 */
4788 uint64_t fGstShadows = 0;
4789 do
4790 {
4791 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4792 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4793 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4794 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4795
4796 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4797 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4798 fHstRegs &= ~RT_BIT_32(idxHstReg);
4799 } while (fHstRegs != 0);
4800 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
4801 }
4802 else
4803 {
4804 /*
4805 * Clear all.
4806 */
4807 do
4808 {
4809 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4810 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4811 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4812 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4813
4814 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4815 fHstRegs &= ~RT_BIT_32(idxHstReg);
4816 } while (fHstRegs != 0);
4817 pReNative->Core.bmGstRegShadows = 0;
4818 }
4819 }
4820}
4821
4822
4823/**
4824 * Restores guest shadow copies in volatile registers.
4825 *
4826 * This is used after calling a helper function (think TLB miss) to restore the
4827 * register state of volatile registers.
4828 *
4829 * @param pReNative The native recompile state.
4830 * @param off The code buffer offset.
4831 * @param fHstRegsActiveShadows Set of host registers which are allowed to
4832 * be active (allocated) w/o asserting. Hack.
4833 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
4834 * iemNativeVarRestoreVolatileRegsPostHlpCall()
4835 */
4836DECL_HIDDEN_THROW(uint32_t)
4837iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
4838{
4839 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4840 if (fHstRegs)
4841 {
4842 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
4843 do
4844 {
4845 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4846
4847 /* It's not fatal if a register is active holding a variable that
4848 shadowing a guest register, ASSUMING all pending guest register
4849 writes were flushed prior to the helper call. However, we'll be
4850 emitting duplicate restores, so it wasts code space. */
4851 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
4852 RT_NOREF(fHstRegsActiveShadows);
4853
4854 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4855 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
4856 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
4857 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
4858
4859 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4860 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
4861
4862 fHstRegs &= ~RT_BIT_32(idxHstReg);
4863 } while (fHstRegs != 0);
4864 }
4865 return off;
4866}
4867
4868
4869
4870
4871/*********************************************************************************************************************************
4872* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
4873*********************************************************************************************************************************/
4874#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4875
4876/**
4877 * Info about shadowed guest SIMD register values.
4878 * @see IEMNATIVEGSTSIMDREG
4879 */
4880static struct
4881{
4882 /** Offset in VMCPU of XMM (low 128-bit) registers. */
4883 uint32_t offXmm;
4884 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
4885 uint32_t offYmm;
4886 /** Name (for logging). */
4887 const char *pszName;
4888} const g_aGstSimdShadowInfo[] =
4889{
4890#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
4891 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
4892 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
4893 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
4894 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
4895 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
4896 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
4897 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
4898 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
4899 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
4900 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
4901 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
4902 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
4903 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
4904 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
4905 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
4906 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
4907 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
4908#undef CPUMCTX_OFF_AND_SIZE
4909};
4910AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
4911
4912
4913#ifdef LOG_ENABLED
4914/** Host CPU SIMD register names. */
4915DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
4916{
4917#ifdef RT_ARCH_AMD64
4918 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
4919#elif RT_ARCH_ARM64
4920 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
4921 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
4922#else
4923# error "port me"
4924#endif
4925};
4926#endif
4927
4928
4929/**
4930 * Frees a temporary SIMD register.
4931 *
4932 * Any shadow copies of guest registers assigned to the host register will not
4933 * be flushed by this operation.
4934 */
4935DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
4936{
4937 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
4938 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
4939 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
4940 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
4941 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
4942}
4943
4944
4945/**
4946 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
4947 *
4948 * @returns New code bufferoffset.
4949 * @param pReNative The native recompile state.
4950 * @param off Current code buffer position.
4951 * @param enmGstSimdReg The guest SIMD register to flush.
4952 */
4953DECL_HIDDEN_THROW(uint32_t)
4954iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
4955{
4956 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
4957
4958 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
4959 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
4960 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg),
4961 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)));
4962
4963 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
4964 {
4965 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
4966 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
4967 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
4968 }
4969
4970 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))
4971 {
4972 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
4973 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
4974 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
4975 }
4976
4977 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, enmGstSimdReg);
4978 return off;
4979}
4980
4981
4982/**
4983 * Locate a register, possibly freeing one up.
4984 *
4985 * This ASSUMES the caller has done the minimal/optimal allocation checks and
4986 * failed.
4987 *
4988 * @returns Host register number on success. Returns UINT8_MAX if no registers
4989 * found, the caller is supposed to deal with this and raise a
4990 * allocation type specific status code (if desired).
4991 *
4992 * @throws VBox status code if we're run into trouble spilling a variable of
4993 * recording debug info. Does NOT throw anything if we're out of
4994 * registers, though.
4995 */
4996static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
4997 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
4998{
4999 //STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
5000 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5001 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5002
5003 /*
5004 * Try a freed register that's shadowing a guest register.
5005 */
5006 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
5007 if (fRegs)
5008 {
5009 //STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
5010
5011#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5012 /*
5013 * When we have livness information, we use it to kick out all shadowed
5014 * guest register that will not be needed any more in this TB. If we're
5015 * lucky, this may prevent us from ending up here again.
5016 *
5017 * Note! We must consider the previous entry here so we don't free
5018 * anything that the current threaded function requires (current
5019 * entry is produced by the next threaded function).
5020 */
5021 uint32_t const idxCurCall = pReNative->idxCurCall;
5022 if (idxCurCall > 0)
5023 {
5024 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
5025
5026# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5027 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
5028 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
5029 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
5030#else
5031 /* Construct a mask of the registers not in the read or write state.
5032 Note! We could skips writes, if they aren't from us, as this is just
5033 a hack to prevent trashing registers that have just been written
5034 or will be written when we retire the current instruction. */
5035 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5036 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5037 & IEMLIVENESSBIT_MASK;
5038#endif
5039 /* If it matches any shadowed registers. */
5040 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
5041 {
5042 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
5043 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
5044 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
5045
5046 /* See if we've got any unshadowed registers we can return now. */
5047 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
5048 if (fUnshadowedRegs)
5049 {
5050 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
5051 return (fPreferVolatile
5052 ? ASMBitFirstSetU32(fUnshadowedRegs)
5053 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
5054 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
5055 - 1;
5056 }
5057 }
5058 }
5059#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5060
5061 unsigned const idxReg = (fPreferVolatile
5062 ? ASMBitFirstSetU32(fRegs)
5063 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5064 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
5065 - 1;
5066
5067 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
5068 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
5069 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5070 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
5071
5072 /* We need to flush any pending guest register writes this host SIMD register shadows. */
5073 uint32_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5074 uint32_t idxGstSimdReg = 0;
5075 do
5076 {
5077 if (fGstRegShadows & 0x1)
5078 {
5079 *poff = iemNativeSimdRegFlushPendingWrite(pReNative, *poff, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5080 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg));
5081 }
5082 idxGstSimdReg++;
5083 fGstRegShadows >>= 1;
5084 } while (fGstRegShadows);
5085
5086 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5087 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5088 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5089 pReNative->Core.aHstSimdRegs[idxReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5090 return idxReg;
5091 }
5092
5093 /*
5094 * Try free up a variable that's in a register.
5095 *
5096 * We do two rounds here, first evacuating variables we don't need to be
5097 * saved on the stack, then in the second round move things to the stack.
5098 */
5099 //STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
5100 AssertReleaseFailed(); /** @todo No variable support right now. */
5101#if 0
5102 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
5103 {
5104 uint32_t fVars = pReNative->Core.bmSimdVars;
5105 while (fVars)
5106 {
5107 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
5108 uint8_t const idxReg = pReNative->Core.aSimdVars[idxVar].idxReg;
5109 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
5110 && (RT_BIT_32(idxReg) & fRegMask)
5111 && ( iLoop == 0
5112 ? pReNative->Core.aSimdVars[idxVar].enmKind != kIemNativeVarKind_Stack
5113 : pReNative->Core.aSimdVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5114 && !pReNative->Core.aSimdVars[idxVar].fRegAcquired)
5115 {
5116 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
5117 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
5118 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5119 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5120 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
5121 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
5122
5123 if (pReNative->Core.aSimdVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5124 {
5125 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
5126 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
5127 }
5128
5129 pReNative->Core.aSimdVars[idxVar].idxReg = UINT8_MAX;
5130 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
5131
5132 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5133 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5134 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5135 return idxReg;
5136 }
5137 fVars &= ~RT_BIT_32(idxVar);
5138 }
5139 }
5140#endif
5141
5142 AssertFailed();
5143 return UINT8_MAX;
5144}
5145
5146
5147/**
5148 * Flushes a set of guest register shadow copies.
5149 *
5150 * This is usually done after calling a threaded function or a C-implementation
5151 * of an instruction.
5152 *
5153 * @param pReNative The native recompile state.
5154 * @param fGstSimdRegs Set of guest SIMD registers to flush.
5155 */
5156DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
5157{
5158 /*
5159 * Reduce the mask by what's currently shadowed
5160 */
5161 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
5162 fGstSimdRegs &= bmGstSimdRegShadows;
5163 if (fGstSimdRegs)
5164 {
5165 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
5166 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
5167 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
5168 if (bmGstSimdRegShadowsNew)
5169 {
5170 /*
5171 * Partial.
5172 */
5173 do
5174 {
5175 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5176 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5177 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5178 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5179 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5180 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5181
5182 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
5183 fGstSimdRegs &= ~fInThisHstReg;
5184 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5185 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5186 if (!fGstRegShadowsNew)
5187 {
5188 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5189 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5190 }
5191 } while (fGstSimdRegs != 0);
5192 }
5193 else
5194 {
5195 /*
5196 * Clear all.
5197 */
5198 do
5199 {
5200 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5201 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5202 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5203 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5204 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5205 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5206
5207 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5208 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
5209 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5210 } while (fGstSimdRegs != 0);
5211 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
5212 }
5213 }
5214}
5215
5216
5217/**
5218 * Allocates a temporary host SIMD register.
5219 *
5220 * This may emit code to save register content onto the stack in order to free
5221 * up a register.
5222 *
5223 * @returns The host register number; throws VBox status code on failure,
5224 * so no need to check the return value.
5225 * @param pReNative The native recompile state.
5226 * @param poff Pointer to the variable with the code buffer position.
5227 * This will be update if we need to move a variable from
5228 * register to stack in order to satisfy the request.
5229 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5230 * registers (@c true, default) or the other way around
5231 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5232 */
5233DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
5234{
5235 /*
5236 * Try find a completely unused register, preferably a call-volatile one.
5237 */
5238 uint8_t idxSimdReg;
5239 uint32_t fRegs = ~pReNative->Core.bmHstRegs
5240 & ~pReNative->Core.bmHstRegsWithGstShadow
5241 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
5242 if (fRegs)
5243 {
5244 if (fPreferVolatile)
5245 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5246 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5247 else
5248 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5249 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5250 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5251 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5252 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5253 }
5254 else
5255 {
5256 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
5257 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5258 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5259 }
5260
5261 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5262 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5263}
5264
5265
5266/**
5267 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
5268 * registers.
5269 *
5270 * @returns The host register number; throws VBox status code on failure,
5271 * so no need to check the return value.
5272 * @param pReNative The native recompile state.
5273 * @param poff Pointer to the variable with the code buffer position.
5274 * This will be update if we need to move a variable from
5275 * register to stack in order to satisfy the request.
5276 * @param fRegMask Mask of acceptable registers.
5277 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5278 * registers (@c true, default) or the other way around
5279 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5280 */
5281DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
5282 bool fPreferVolatile /*= true*/)
5283{
5284 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5285 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5286
5287 /*
5288 * Try find a completely unused register, preferably a call-volatile one.
5289 */
5290 uint8_t idxSimdReg;
5291 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
5292 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5293 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
5294 & fRegMask;
5295 if (fRegs)
5296 {
5297 if (fPreferVolatile)
5298 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5299 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5300 else
5301 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5302 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5303 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5304 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5305 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5306 }
5307 else
5308 {
5309 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
5310 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5311 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5312 }
5313
5314 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5315 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5316}
5317
5318
5319/**
5320 * Sets the indiactor for which part of the given SIMD register has valid data loaded.
5321 *
5322 * @param pReNative The native recompile state.
5323 * @param idxHstSimdReg The host SIMD register to update the state for.
5324 * @param enmLoadSz The load size to set.
5325 */
5326DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg,
5327 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5328{
5329 /* Everything valid already? -> nothing to do. */
5330 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5331 return;
5332
5333 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid)
5334 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
5335 else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz)
5336 {
5337 Assert( ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128
5338 && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5339 || ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128
5340 && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128));
5341 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256;
5342 }
5343}
5344
5345
5346static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdRegDst,
5347 uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
5348{
5349 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
5350 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst
5351 || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5352 {
5353# ifdef RT_ARCH_ARM64
5354 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
5355 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
5356# endif
5357
5358 if (idxHstSimdRegDst != idxHstSimdRegSrc)
5359 {
5360 switch (enmLoadSzDst)
5361 {
5362 case kIemNativeGstSimdRegLdStSz_256:
5363 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5364 break;
5365 case kIemNativeGstSimdRegLdStSz_Low128:
5366 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5367 break;
5368 case kIemNativeGstSimdRegLdStSz_High128:
5369 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst + 1, idxHstSimdRegSrc + 1);
5370 break;
5371 default:
5372 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5373 }
5374
5375 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst);
5376 }
5377 }
5378 else
5379 {
5380 /* Complicated stuff where the source is currently missing something, later. */
5381 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5382 }
5383
5384 return off;
5385}
5386
5387
5388/**
5389 * Allocates a temporary host SIMD register for keeping a guest
5390 * SIMD register value.
5391 *
5392 * Since we may already have a register holding the guest register value,
5393 * code will be emitted to do the loading if that's not the case. Code may also
5394 * be emitted if we have to free up a register to satify the request.
5395 *
5396 * @returns The host register number; throws VBox status code on failure, so no
5397 * need to check the return value.
5398 * @param pReNative The native recompile state.
5399 * @param poff Pointer to the variable with the code buffer
5400 * position. This will be update if we need to move a
5401 * variable from register to stack in order to satisfy
5402 * the request.
5403 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
5404 * @param enmIntendedUse How the caller will be using the host register.
5405 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
5406 * register is okay (default). The ASSUMPTION here is
5407 * that the caller has already flushed all volatile
5408 * registers, so this is only applied if we allocate a
5409 * new register.
5410 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
5411 */
5412DECL_HIDDEN_THROW(uint8_t)
5413iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
5414 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
5415 bool fNoVolatileRegs /*= false*/)
5416{
5417 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
5418#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
5419 AssertMsg( pReNative->idxCurCall == 0
5420 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5421 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5422 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
5423 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5424 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
5425 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
5426#endif
5427#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5428 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
5429#endif
5430 uint32_t const fRegMask = !fNoVolatileRegs
5431 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
5432 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
5433
5434 /*
5435 * First check if the guest register value is already in a host register.
5436 */
5437 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
5438 {
5439 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5440 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
5441 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
5442 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
5443
5444 /* It's not supposed to be allocated... */
5445 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
5446 {
5447 /*
5448 * If the register will trash the guest shadow copy, try find a
5449 * completely unused register we can use instead. If that fails,
5450 * we need to disassociate the host reg from the guest reg.
5451 */
5452 /** @todo would be nice to know if preserving the register is in any way helpful. */
5453 /* If the purpose is calculations, try duplicate the register value as
5454 we'll be clobbering the shadow. */
5455 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
5456 && ( ~pReNative->Core.bmHstSimdRegs
5457 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5458 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
5459 {
5460 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
5461
5462 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5463
5464 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5465 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5466 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5467 idxSimdReg = idxRegNew;
5468 }
5469 /* If the current register matches the restrictions, go ahead and allocate
5470 it for the caller. */
5471 else if (fRegMask & RT_BIT_32(idxSimdReg))
5472 {
5473 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5474 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
5475 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5476 {
5477 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5478 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxSimdReg, idxSimdReg, enmLoadSz);
5479 else
5480 iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz);
5481 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
5482 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5483 }
5484 else
5485 {
5486 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
5487 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
5488 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
5489 }
5490 }
5491 /* Otherwise, allocate a register that satisfies the caller and transfer
5492 the shadowing if compatible with the intended use. (This basically
5493 means the call wants a non-volatile register (RSP push/pop scenario).) */
5494 else
5495 {
5496 Assert(fNoVolatileRegs);
5497 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
5498 !fNoVolatileRegs
5499 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
5500 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5501 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5502 {
5503 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5504 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
5505 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
5506 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5507 }
5508 else
5509 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5510 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5511 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5512 idxSimdReg = idxRegNew;
5513 }
5514 }
5515 else
5516 {
5517 /*
5518 * Oops. Shadowed guest register already allocated!
5519 *
5520 * Allocate a new register, copy the value and, if updating, the
5521 * guest shadow copy assignment to the new register.
5522 */
5523 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5524 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
5525 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
5526 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
5527
5528 /** @todo share register for readonly access. */
5529 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
5530 enmIntendedUse == kIemNativeGstRegUse_Calculation);
5531
5532 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5533 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5534 else
5535 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5536
5537 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5538 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5539 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
5540 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5541 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5542 else
5543 {
5544 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5545 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
5546 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5547 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5548 }
5549 idxSimdReg = idxRegNew;
5550 }
5551 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
5552
5553#ifdef VBOX_STRICT
5554 /* Strict builds: Check that the value is correct. */
5555 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5556 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
5557#endif
5558
5559 return idxSimdReg;
5560 }
5561
5562 /*
5563 * Allocate a new register, load it with the guest value and designate it as a copy of the
5564 */
5565 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
5566
5567 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5568 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
5569 else
5570 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5571
5572 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5573 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
5574
5575 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
5576 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5577
5578 return idxRegNew;
5579}
5580
5581#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5582
5583
5584
5585/*********************************************************************************************************************************
5586* Code emitters for flushing pending guest register writes and sanity checks *
5587*********************************************************************************************************************************/
5588
5589#ifdef VBOX_STRICT
5590/**
5591 * Does internal register allocator sanity checks.
5592 */
5593DECLHIDDEN(void) iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
5594{
5595 /*
5596 * Iterate host registers building a guest shadowing set.
5597 */
5598 uint64_t bmGstRegShadows = 0;
5599 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
5600 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
5601 while (bmHstRegsWithGstShadow)
5602 {
5603 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
5604 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5605 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5606
5607 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5608 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
5609 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
5610 bmGstRegShadows |= fThisGstRegShadows;
5611 while (fThisGstRegShadows)
5612 {
5613 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
5614 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
5615 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
5616 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
5617 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
5618 }
5619 }
5620 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
5621 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
5622 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
5623
5624 /*
5625 * Now the other way around, checking the guest to host index array.
5626 */
5627 bmHstRegsWithGstShadow = 0;
5628 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
5629 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5630 while (bmGstRegShadows)
5631 {
5632 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
5633 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5634 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
5635
5636 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5637 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
5638 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
5639 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
5640 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5641 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
5642 }
5643 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
5644 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
5645 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
5646}
5647#endif /* VBOX_STRICT */
5648
5649
5650/**
5651 * Flushes any delayed guest register writes.
5652 *
5653 * This must be called prior to calling CImpl functions and any helpers that use
5654 * the guest state (like raising exceptions) and such.
5655 *
5656 * This optimization has not yet been implemented. The first target would be
5657 * RIP updates, since these are the most common ones.
5658 */
5659DECL_HIDDEN_THROW(uint32_t)
5660iemNativeRegFlushPendingWritesSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept, bool fFlushShadows)
5661{
5662#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5663 if (!(fGstShwExcept & kIemNativeGstReg_Pc))
5664 off = iemNativeEmitPcWriteback(pReNative, off);
5665#else
5666 RT_NOREF(pReNative, fGstShwExcept);
5667#endif
5668
5669#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5670 /** @todo r=bird: There must be a quicker way to check if anything needs
5671 * doing and then call simd function to do the flushing */
5672 /** @todo This doesn't mix well with fGstShwExcept but we ignore this for now and just flush everything. */
5673 for (uint8_t idxGstSimdReg = 0; idxGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo); idxGstSimdReg++)
5674 {
5675 Assert( (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg)
5676 || !IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg)));
5677
5678 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg))
5679 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5680
5681 if ( fFlushShadows
5682 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg))
5683 {
5684 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxGstSimdReg];
5685
5686 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
5687 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg)));
5688 }
5689 }
5690#else
5691 RT_NOREF(pReNative, fGstShwExcept, fFlushShadows);
5692#endif
5693
5694 return off;
5695}
5696
5697
5698#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5699/**
5700 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
5701 */
5702DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcWritebackSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5703{
5704 Assert(pReNative->Core.offPc);
5705# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5706 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5707 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, pReNative->Core.cInstrPcUpdateSkipped);
5708# endif
5709
5710# ifndef IEMNATIVE_REG_FIXED_PC_DBG
5711 /* Allocate a temporary PC register. */
5712 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5713
5714 /* Perform the addition and store the result. */
5715 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5716 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5717
5718 /* Free but don't flush the PC register. */
5719 iemNativeRegFreeTmp(pReNative, idxPcReg);
5720# else
5721 /* Compare the shadow with the context value, they should match. */
5722 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
5723 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
5724# endif
5725
5726 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, pReNative->Core.cInstrPcUpdateSkipped);
5727 pReNative->Core.offPc = 0;
5728 pReNative->Core.cInstrPcUpdateSkipped = 0;
5729
5730 return off;
5731}
5732#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
5733
5734
5735/*********************************************************************************************************************************
5736* Code Emitters (larger snippets) *
5737*********************************************************************************************************************************/
5738
5739/**
5740 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
5741 * extending to 64-bit width.
5742 *
5743 * @returns New code buffer offset on success, UINT32_MAX on failure.
5744 * @param pReNative .
5745 * @param off The current code buffer position.
5746 * @param idxHstReg The host register to load the guest register value into.
5747 * @param enmGstReg The guest register to load.
5748 *
5749 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5750 * that is something the caller needs to do if applicable.
5751 */
5752DECL_HIDDEN_THROW(uint32_t)
5753iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
5754{
5755 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
5756 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
5757
5758 switch (g_aGstShadowInfo[enmGstReg].cb)
5759 {
5760 case sizeof(uint64_t):
5761 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5762 case sizeof(uint32_t):
5763 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5764 case sizeof(uint16_t):
5765 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5766#if 0 /* not present in the table. */
5767 case sizeof(uint8_t):
5768 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5769#endif
5770 default:
5771 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5772 }
5773}
5774
5775
5776#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5777/**
5778 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
5779 *
5780 * @returns New code buffer offset on success, UINT32_MAX on failure.
5781 * @param pReNative The recompiler state.
5782 * @param off The current code buffer position.
5783 * @param idxHstSimdReg The host register to load the guest register value into.
5784 * @param enmGstSimdReg The guest register to load.
5785 * @param enmLoadSz The load size of the register.
5786 *
5787 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5788 * that is something the caller needs to do if applicable.
5789 */
5790DECL_HIDDEN_THROW(uint32_t)
5791iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
5792 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5793{
5794 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
5795
5796 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz);
5797 switch (enmLoadSz)
5798 {
5799 case kIemNativeGstSimdRegLdStSz_256:
5800 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5801 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5802 case kIemNativeGstSimdRegLdStSz_Low128:
5803 return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5804 case kIemNativeGstSimdRegLdStSz_High128:
5805 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5806 default:
5807 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5808 }
5809}
5810#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5811
5812#ifdef VBOX_STRICT
5813
5814/**
5815 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
5816 *
5817 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5818 * Trashes EFLAGS on AMD64.
5819 */
5820DECL_HIDDEN_THROW(uint32_t)
5821iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
5822{
5823# ifdef RT_ARCH_AMD64
5824 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
5825
5826 /* rol reg64, 32 */
5827 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5828 pbCodeBuf[off++] = 0xc1;
5829 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5830 pbCodeBuf[off++] = 32;
5831
5832 /* test reg32, ffffffffh */
5833 if (idxReg >= 8)
5834 pbCodeBuf[off++] = X86_OP_REX_B;
5835 pbCodeBuf[off++] = 0xf7;
5836 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5837 pbCodeBuf[off++] = 0xff;
5838 pbCodeBuf[off++] = 0xff;
5839 pbCodeBuf[off++] = 0xff;
5840 pbCodeBuf[off++] = 0xff;
5841
5842 /* je/jz +1 */
5843 pbCodeBuf[off++] = 0x74;
5844 pbCodeBuf[off++] = 0x01;
5845
5846 /* int3 */
5847 pbCodeBuf[off++] = 0xcc;
5848
5849 /* rol reg64, 32 */
5850 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5851 pbCodeBuf[off++] = 0xc1;
5852 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5853 pbCodeBuf[off++] = 32;
5854
5855# elif defined(RT_ARCH_ARM64)
5856 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5857 /* lsr tmp0, reg64, #32 */
5858 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
5859 /* cbz tmp0, +1 */
5860 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5861 /* brk #0x1100 */
5862 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
5863
5864# else
5865# error "Port me!"
5866# endif
5867 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5868 return off;
5869}
5870
5871
5872/**
5873 * Emitting code that checks that the content of register @a idxReg is the same
5874 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
5875 * instruction if that's not the case.
5876 *
5877 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5878 * Trashes EFLAGS on AMD64.
5879 */
5880DECL_HIDDEN_THROW(uint32_t)
5881iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
5882{
5883# ifdef RT_ARCH_AMD64
5884 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5885
5886 /* cmp reg, [mem] */
5887 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
5888 {
5889 if (idxReg >= 8)
5890 pbCodeBuf[off++] = X86_OP_REX_R;
5891 pbCodeBuf[off++] = 0x38;
5892 }
5893 else
5894 {
5895 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
5896 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
5897 else
5898 {
5899 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
5900 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5901 else
5902 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
5903 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
5904 if (idxReg >= 8)
5905 pbCodeBuf[off++] = X86_OP_REX_R;
5906 }
5907 pbCodeBuf[off++] = 0x39;
5908 }
5909 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
5910
5911 /* je/jz +1 */
5912 pbCodeBuf[off++] = 0x74;
5913 pbCodeBuf[off++] = 0x01;
5914
5915 /* int3 */
5916 pbCodeBuf[off++] = 0xcc;
5917
5918 /* For values smaller than the register size, we must check that the rest
5919 of the register is all zeros. */
5920 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
5921 {
5922 /* test reg64, imm32 */
5923 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5924 pbCodeBuf[off++] = 0xf7;
5925 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5926 pbCodeBuf[off++] = 0;
5927 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
5928 pbCodeBuf[off++] = 0xff;
5929 pbCodeBuf[off++] = 0xff;
5930
5931 /* je/jz +1 */
5932 pbCodeBuf[off++] = 0x74;
5933 pbCodeBuf[off++] = 0x01;
5934
5935 /* int3 */
5936 pbCodeBuf[off++] = 0xcc;
5937 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5938 }
5939 else
5940 {
5941 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5942 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
5943 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
5944 }
5945
5946# elif defined(RT_ARCH_ARM64)
5947 /* mov TMP0, [gstreg] */
5948 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
5949
5950 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5951 /* sub tmp0, tmp0, idxReg */
5952 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
5953 /* cbz tmp0, +1 */
5954 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5955 /* brk #0x1000+enmGstReg */
5956 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
5957 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5958
5959# else
5960# error "Port me!"
5961# endif
5962 return off;
5963}
5964
5965
5966# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5967/**
5968 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
5969 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
5970 * instruction if that's not the case.
5971 *
5972 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
5973 * Trashes EFLAGS on AMD64.
5974 */
5975DECL_HIDDEN_THROW(uint32_t)
5976iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
5977 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5978{
5979 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
5980 if ( ( enmLoadSz == kIemNativeGstSimdRegLdStSz_256
5981 && ( IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)
5982 || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
5983 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128
5984 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
5985 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_High128
5986 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
5987 return off;
5988
5989# ifdef RT_ARCH_AMD64
5990 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128); /** @todo 256-bit variant. */
5991
5992 /* movdqa vectmp0, idxSimdReg */
5993 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
5994
5995 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
5996
5997 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
5998 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5999 if (IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8)
6000 pbCodeBuf[off++] = X86_OP_REX_R;
6001 pbCodeBuf[off++] = 0x0f;
6002 pbCodeBuf[off++] = 0x38;
6003 pbCodeBuf[off++] = 0x29;
6004 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6005
6006 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
6007 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6008 pbCodeBuf[off++] = X86_OP_REX_W
6009 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
6010 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6011 pbCodeBuf[off++] = 0x0f;
6012 pbCodeBuf[off++] = 0x3a;
6013 pbCodeBuf[off++] = 0x16;
6014 pbCodeBuf[off++] = 0xeb;
6015 pbCodeBuf[off++] = 0x00;
6016
6017 /* cmp tmp0, 0xffffffffffffffff. */
6018 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6019 pbCodeBuf[off++] = 0x83;
6020 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6021 pbCodeBuf[off++] = 0xff;
6022
6023 /* je/jz +1 */
6024 pbCodeBuf[off++] = 0x74;
6025 pbCodeBuf[off++] = 0x01;
6026
6027 /* int3 */
6028 pbCodeBuf[off++] = 0xcc;
6029
6030 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
6031 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6032 pbCodeBuf[off++] = X86_OP_REX_W
6033 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
6034 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6035 pbCodeBuf[off++] = 0x0f;
6036 pbCodeBuf[off++] = 0x3a;
6037 pbCodeBuf[off++] = 0x16;
6038 pbCodeBuf[off++] = 0xeb;
6039 pbCodeBuf[off++] = 0x01;
6040
6041 /* cmp tmp0, 0xffffffffffffffff. */
6042 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6043 pbCodeBuf[off++] = 0x83;
6044 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6045 pbCodeBuf[off++] = 0xff;
6046
6047 /* je/jz +1 */
6048 pbCodeBuf[off++] = 0x74;
6049 pbCodeBuf[off++] = 0x01;
6050
6051 /* int3 */
6052 pbCodeBuf[off++] = 0xcc;
6053
6054# elif defined(RT_ARCH_ARM64)
6055 /* mov vectmp0, [gstreg] */
6056 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
6057
6058 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6059 {
6060 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6061 /* eor vectmp0, vectmp0, idxSimdReg */
6062 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6063 /* cnt vectmp0, vectmp0, #0*/
6064 pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0);
6065 /* umov tmp0, vectmp0.D[0] */
6066 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6067 0 /*idxElem*/, kArmv8InstrUmovInsSz_U64);
6068 /* cbz tmp0, +1 */
6069 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6070 /* brk #0x1000+enmGstReg */
6071 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6072 }
6073
6074 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6075 {
6076 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6077 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
6078 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg);
6079 /* cnt vectmp0 + 1, vectmp0 + 1, #0*/
6080 pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1);
6081 /* umov tmp0, (vectmp0 + 1).D[0] */
6082 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
6083 0 /*idxElem*/, kArmv8InstrUmovInsSz_U64);
6084 /* cbz tmp0, +1 */
6085 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6086 /* brk #0x1000+enmGstReg */
6087 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6088 }
6089
6090# else
6091# error "Port me!"
6092# endif
6093
6094 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6095 return off;
6096}
6097# endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6098
6099
6100/**
6101 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
6102 * important bits.
6103 *
6104 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6105 * Trashes EFLAGS on AMD64.
6106 */
6107DECL_HIDDEN_THROW(uint32_t)
6108iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
6109{
6110 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6111 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
6112 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
6113 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
6114
6115#ifdef RT_ARCH_AMD64
6116 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6117
6118 /* je/jz +1 */
6119 pbCodeBuf[off++] = 0x74;
6120 pbCodeBuf[off++] = 0x01;
6121
6122 /* int3 */
6123 pbCodeBuf[off++] = 0xcc;
6124
6125# elif defined(RT_ARCH_ARM64)
6126 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6127
6128 /* b.eq +1 */
6129 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
6130 /* brk #0x2000 */
6131 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
6132
6133# else
6134# error "Port me!"
6135# endif
6136 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6137
6138 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6139 return off;
6140}
6141
6142#endif /* VBOX_STRICT */
6143
6144
6145#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6146/**
6147 * Worker for IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK.
6148 */
6149DECL_HIDDEN_THROW(uint32_t)
6150iemNativeEmitEFlagsSkippingCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflNeeded)
6151{
6152 uint32_t const offVCpu = RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags);
6153
6154 fEflNeeded &= X86_EFL_STATUS_BITS;
6155 if (fEflNeeded)
6156 {
6157# ifdef RT_ARCH_AMD64
6158 /* test dword [pVCpu + offVCpu], imm32 */
6159 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6160 if (fEflNeeded <= 0xff)
6161 {
6162 pCodeBuf[off++] = 0xf6;
6163 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6164 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6165 }
6166 else
6167 {
6168 pCodeBuf[off++] = 0xf7;
6169 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6170 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6171 pCodeBuf[off++] = RT_BYTE2(fEflNeeded);
6172 pCodeBuf[off++] = RT_BYTE3(fEflNeeded);
6173 pCodeBuf[off++] = RT_BYTE4(fEflNeeded);
6174 }
6175 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6176
6177# else
6178 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6179 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, offVCpu);
6180 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp, fEflNeeded);
6181# ifdef RT_ARCH_ARM64
6182 off = iemNativeEmitJzToFixed(pReNative, off, off + 2);
6183 off = iemNativeEmitBrk(pReNative, off, 0x7777);
6184# else
6185# error "Port me!"
6186# endif
6187 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6188# endif
6189 }
6190 return off;
6191}
6192#endif /* IEMNATIVE_STRICT_EFLAGS_SKIPPING */
6193
6194
6195/**
6196 * Emits a code for checking the return code of a call and rcPassUp, returning
6197 * from the code if either are non-zero.
6198 */
6199DECL_HIDDEN_THROW(uint32_t)
6200iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6201{
6202#ifdef RT_ARCH_AMD64
6203 /*
6204 * AMD64: eax = call status code.
6205 */
6206
6207 /* edx = rcPassUp */
6208 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6209# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6210 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
6211# endif
6212
6213 /* edx = eax | rcPassUp */
6214 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6215 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
6216 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
6217 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6218
6219 /* Jump to non-zero status return path. */
6220 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
6221
6222 /* done. */
6223
6224#elif RT_ARCH_ARM64
6225 /*
6226 * ARM64: w0 = call status code.
6227 */
6228# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6229 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
6230# endif
6231 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6232
6233 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6234
6235 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
6236
6237 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6238 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6239 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
6240
6241#else
6242# error "port me"
6243#endif
6244 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6245 RT_NOREF_PV(idxInstr);
6246 return off;
6247}
6248
6249
6250/**
6251 * Emits code to check if the content of @a idxAddrReg is a canonical address,
6252 * raising a \#GP(0) if it isn't.
6253 *
6254 * @returns New code buffer offset, UINT32_MAX on failure.
6255 * @param pReNative The native recompile state.
6256 * @param off The code buffer offset.
6257 * @param idxAddrReg The host register with the address to check.
6258 * @param idxInstr The current instruction.
6259 */
6260DECL_HIDDEN_THROW(uint32_t)
6261iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
6262{
6263 /*
6264 * Make sure we don't have any outstanding guest register writes as we may
6265 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6266 */
6267 off = iemNativeRegFlushPendingWrites(pReNative, off);
6268
6269#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6270 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6271#else
6272 RT_NOREF(idxInstr);
6273#endif
6274
6275#ifdef RT_ARCH_AMD64
6276 /*
6277 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
6278 * return raisexcpt();
6279 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
6280 */
6281 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6282
6283 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
6284 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
6285 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
6286 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
6287 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6288
6289 iemNativeRegFreeTmp(pReNative, iTmpReg);
6290
6291#elif defined(RT_ARCH_ARM64)
6292 /*
6293 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
6294 * return raisexcpt();
6295 * ----
6296 * mov x1, 0x800000000000
6297 * add x1, x0, x1
6298 * cmp xzr, x1, lsr 48
6299 * b.ne .Lraisexcpt
6300 */
6301 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6302
6303 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
6304 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
6305 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
6306 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6307
6308 iemNativeRegFreeTmp(pReNative, iTmpReg);
6309
6310#else
6311# error "Port me"
6312#endif
6313 return off;
6314}
6315
6316
6317/**
6318 * Emits code to check if that the content of @a idxAddrReg is within the limit
6319 * of CS, raising a \#GP(0) if it isn't.
6320 *
6321 * @returns New code buffer offset; throws VBox status code on error.
6322 * @param pReNative The native recompile state.
6323 * @param off The code buffer offset.
6324 * @param idxAddrReg The host register (32-bit) with the address to
6325 * check.
6326 * @param idxInstr The current instruction.
6327 */
6328DECL_HIDDEN_THROW(uint32_t)
6329iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6330 uint8_t idxAddrReg, uint8_t idxInstr)
6331{
6332 /*
6333 * Make sure we don't have any outstanding guest register writes as we may
6334 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6335 */
6336 off = iemNativeRegFlushPendingWrites(pReNative, off);
6337
6338#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6339 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6340#else
6341 RT_NOREF(idxInstr);
6342#endif
6343
6344 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
6345 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
6346 kIemNativeGstRegUse_ReadOnly);
6347
6348 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
6349 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6350
6351 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
6352 return off;
6353}
6354
6355
6356/**
6357 * Emits a call to a CImpl function or something similar.
6358 */
6359DECL_HIDDEN_THROW(uint32_t)
6360iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
6361 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
6362{
6363 /* Writeback everything. */
6364 off = iemNativeRegFlushPendingWrites(pReNative, off);
6365
6366 /*
6367 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
6368 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
6369 */
6370 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
6371 fGstShwFlush
6372 | RT_BIT_64(kIemNativeGstReg_Pc)
6373 | RT_BIT_64(kIemNativeGstReg_EFlags));
6374 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6375
6376 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6377
6378 /*
6379 * Load the parameters.
6380 */
6381#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
6382 /* Special code the hidden VBOXSTRICTRC pointer. */
6383 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6384 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6385 if (cAddParams > 0)
6386 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
6387 if (cAddParams > 1)
6388 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
6389 if (cAddParams > 2)
6390 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
6391 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6392
6393#else
6394 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6395 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6396 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6397 if (cAddParams > 0)
6398 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
6399 if (cAddParams > 1)
6400 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
6401 if (cAddParams > 2)
6402# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
6403 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
6404# else
6405 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
6406# endif
6407#endif
6408
6409 /*
6410 * Make the call.
6411 */
6412 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
6413
6414#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6415 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6416#endif
6417
6418 /*
6419 * Check the status code.
6420 */
6421 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
6422}
6423
6424
6425/**
6426 * Emits a call to a threaded worker function.
6427 */
6428DECL_HIDDEN_THROW(uint32_t)
6429iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6430{
6431 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6432
6433 /* We don't know what the threaded function is doing so we must flush all pending writes. */
6434 off = iemNativeRegFlushPendingWrites(pReNative, off);
6435
6436 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
6437 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6438
6439#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6440 /* The threaded function may throw / long jmp, so set current instruction
6441 number if we're counting. */
6442 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6443#endif
6444
6445 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
6446
6447#ifdef RT_ARCH_AMD64
6448 /* Load the parameters and emit the call. */
6449# ifdef RT_OS_WINDOWS
6450# ifndef VBOXSTRICTRC_STRICT_ENABLED
6451 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6452 if (cParams > 0)
6453 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
6454 if (cParams > 1)
6455 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
6456 if (cParams > 2)
6457 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
6458# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
6459 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
6460 if (cParams > 0)
6461 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
6462 if (cParams > 1)
6463 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
6464 if (cParams > 2)
6465 {
6466 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
6467 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
6468 }
6469 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6470# endif /* VBOXSTRICTRC_STRICT_ENABLED */
6471# else
6472 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6473 if (cParams > 0)
6474 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
6475 if (cParams > 1)
6476 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
6477 if (cParams > 2)
6478 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
6479# endif
6480
6481 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6482
6483# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6484 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6485# endif
6486
6487#elif RT_ARCH_ARM64
6488 /*
6489 * ARM64:
6490 */
6491 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6492 if (cParams > 0)
6493 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
6494 if (cParams > 1)
6495 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
6496 if (cParams > 2)
6497 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
6498
6499 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6500
6501#else
6502# error "port me"
6503#endif
6504
6505 /*
6506 * Check the status code.
6507 */
6508 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
6509
6510 return off;
6511}
6512
6513#ifdef VBOX_WITH_STATISTICS
6514/**
6515 * Emits code to update the thread call statistics.
6516 */
6517DECL_INLINE_THROW(uint32_t)
6518iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6519{
6520 /*
6521 * Update threaded function stats.
6522 */
6523 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
6524 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
6525# if defined(RT_ARCH_ARM64)
6526 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6527 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6528 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
6529 iemNativeRegFreeTmp(pReNative, idxTmp1);
6530 iemNativeRegFreeTmp(pReNative, idxTmp2);
6531# else
6532 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
6533# endif
6534 return off;
6535}
6536#endif /* VBOX_WITH_STATISTICS */
6537
6538
6539/**
6540 * Emits the code at the CheckBranchMiss label.
6541 */
6542static uint32_t iemNativeEmitCheckBranchMiss(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6543{
6544 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_CheckBranchMiss);
6545 if (idxLabel != UINT32_MAX)
6546 {
6547 iemNativeLabelDefine(pReNative, idxLabel, off);
6548
6549 /* int iemNativeHlpCheckBranchMiss(PVMCPUCC pVCpu) */
6550 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6551 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpCheckBranchMiss);
6552
6553 /* jump back to the return sequence. */
6554 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6555 }
6556 return off;
6557}
6558
6559
6560/**
6561 * Emits the code at the NeedCsLimChecking label.
6562 */
6563static uint32_t iemNativeEmitNeedCsLimChecking(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6564{
6565 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NeedCsLimChecking);
6566 if (idxLabel != UINT32_MAX)
6567 {
6568 iemNativeLabelDefine(pReNative, idxLabel, off);
6569
6570 /* int iemNativeHlpNeedCsLimChecking(PVMCPUCC pVCpu) */
6571 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6572 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpNeedCsLimChecking);
6573
6574 /* jump back to the return sequence. */
6575 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6576 }
6577 return off;
6578}
6579
6580
6581/**
6582 * Emits the code at the ObsoleteTb label.
6583 */
6584static uint32_t iemNativeEmitObsoleteTb(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6585{
6586 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ObsoleteTb);
6587 if (idxLabel != UINT32_MAX)
6588 {
6589 iemNativeLabelDefine(pReNative, idxLabel, off);
6590
6591 /* int iemNativeHlpObsoleteTb(PVMCPUCC pVCpu) */
6592 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6593 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpObsoleteTb);
6594
6595 /* jump back to the return sequence. */
6596 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6597 }
6598 return off;
6599}
6600
6601
6602/**
6603 * Emits the code at the RaiseGP0 label.
6604 */
6605static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6606{
6607 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseGp0);
6608 if (idxLabel != UINT32_MAX)
6609 {
6610 iemNativeLabelDefine(pReNative, idxLabel, off);
6611
6612 /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu) */
6613 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6614 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
6615
6616 /* jump back to the return sequence. */
6617 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6618 }
6619 return off;
6620}
6621
6622
6623/**
6624 * Emits the code at the RaiseNm label.
6625 */
6626static uint32_t iemNativeEmitRaiseNm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6627{
6628 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseNm);
6629 if (idxLabel != UINT32_MAX)
6630 {
6631 iemNativeLabelDefine(pReNative, idxLabel, off);
6632
6633 /* iemNativeHlpExecRaiseNm(PVMCPUCC pVCpu) */
6634 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6635 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseNm);
6636
6637 /* jump back to the return sequence. */
6638 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6639 }
6640 return off;
6641}
6642
6643
6644/**
6645 * Emits the code at the RaiseUd label.
6646 */
6647static uint32_t iemNativeEmitRaiseUd(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6648{
6649 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseUd);
6650 if (idxLabel != UINT32_MAX)
6651 {
6652 iemNativeLabelDefine(pReNative, idxLabel, off);
6653
6654 /* iemNativeHlpExecRaiseUd(PVMCPUCC pVCpu) */
6655 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6656 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseUd);
6657
6658 /* jump back to the return sequence. */
6659 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6660 }
6661 return off;
6662}
6663
6664
6665/**
6666 * Emits the code at the RaiseMf label.
6667 */
6668static uint32_t iemNativeEmitRaiseMf(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6669{
6670 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseMf);
6671 if (idxLabel != UINT32_MAX)
6672 {
6673 iemNativeLabelDefine(pReNative, idxLabel, off);
6674
6675 /* iemNativeHlpExecRaiseMf(PVMCPUCC pVCpu) */
6676 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6677 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseMf);
6678
6679 /* jump back to the return sequence. */
6680 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6681 }
6682 return off;
6683}
6684
6685
6686/**
6687 * Emits the code at the RaiseXf label.
6688 */
6689static uint32_t iemNativeEmitRaiseXf(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6690{
6691 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseXf);
6692 if (idxLabel != UINT32_MAX)
6693 {
6694 iemNativeLabelDefine(pReNative, idxLabel, off);
6695
6696 /* iemNativeHlpExecRaiseXf(PVMCPUCC pVCpu) */
6697 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6698 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseXf);
6699
6700 /* jump back to the return sequence. */
6701 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6702 }
6703 return off;
6704}
6705
6706
6707/**
6708 * Emits the code at the RaiseDe label.
6709 */
6710static uint32_t iemNativeEmitRaiseDe(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6711{
6712 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseDe);
6713 if (idxLabel != UINT32_MAX)
6714 {
6715 iemNativeLabelDefine(pReNative, idxLabel, off);
6716
6717 /* iemNativeHlpExecRaiseDe(PVMCPUCC pVCpu) */
6718 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6719 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseDe);
6720
6721 /* jump back to the return sequence. */
6722 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6723 }
6724 return off;
6725}
6726
6727
6728/**
6729 * Emits the code at the ReturnWithFlags label (returns
6730 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
6731 */
6732static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6733{
6734 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
6735 if (idxLabel != UINT32_MAX)
6736 {
6737 iemNativeLabelDefine(pReNative, idxLabel, off);
6738
6739 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
6740
6741 /* jump back to the return sequence. */
6742 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6743 }
6744 return off;
6745}
6746
6747
6748/**
6749 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
6750 */
6751static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6752{
6753 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
6754 if (idxLabel != UINT32_MAX)
6755 {
6756 iemNativeLabelDefine(pReNative, idxLabel, off);
6757
6758 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
6759
6760 /* jump back to the return sequence. */
6761 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6762 }
6763 return off;
6764}
6765
6766
6767/**
6768 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
6769 */
6770static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6771{
6772 /*
6773 * Generate the rc + rcPassUp fiddling code if needed.
6774 */
6775 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6776 if (idxLabel != UINT32_MAX)
6777 {
6778 iemNativeLabelDefine(pReNative, idxLabel, off);
6779
6780 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
6781#ifdef RT_ARCH_AMD64
6782# ifdef RT_OS_WINDOWS
6783# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6784 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
6785# endif
6786 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6787 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
6788# else
6789 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6790 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
6791# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6792 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
6793# endif
6794# endif
6795# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6796 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
6797# endif
6798
6799#else
6800 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
6801 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6802 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
6803#endif
6804
6805 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
6806 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6807 }
6808 return off;
6809}
6810
6811
6812/**
6813 * Emits a standard epilog.
6814 */
6815static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
6816{
6817 *pidxReturnLabel = UINT32_MAX;
6818
6819 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
6820 off = iemNativeRegFlushPendingWrites(pReNative, off);
6821
6822 /*
6823 * Successful return, so clear the return register (eax, w0).
6824 */
6825 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
6826
6827 /*
6828 * Define label for common return point.
6829 */
6830 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
6831 *pidxReturnLabel = idxReturn;
6832
6833 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6834
6835 /*
6836 * Restore registers and return.
6837 */
6838#ifdef RT_ARCH_AMD64
6839 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6840
6841 /* Reposition esp at the r15 restore point. */
6842 pbCodeBuf[off++] = X86_OP_REX_W;
6843 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
6844 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
6845 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
6846
6847 /* Pop non-volatile registers and return */
6848 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
6849 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
6850 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
6851 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
6852 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
6853 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
6854 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
6855 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
6856# ifdef RT_OS_WINDOWS
6857 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
6858 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
6859# endif
6860 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
6861 pbCodeBuf[off++] = 0xc9; /* leave */
6862 pbCodeBuf[off++] = 0xc3; /* ret */
6863 pbCodeBuf[off++] = 0xcc; /* int3 poison */
6864
6865#elif RT_ARCH_ARM64
6866 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6867
6868 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
6869 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
6870 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6871 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6872 IEMNATIVE_FRAME_VAR_SIZE / 8);
6873 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
6874 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6875 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6876 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6877 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6878 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6879 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6880 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6881 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6882 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6883 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6884 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6885
6886 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
6887 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
6888 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
6889 IEMNATIVE_FRAME_SAVE_REG_SIZE);
6890
6891 /* retab / ret */
6892# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
6893 if (1)
6894 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
6895 else
6896# endif
6897 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
6898
6899#else
6900# error "port me"
6901#endif
6902 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6903
6904 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
6905}
6906
6907
6908/**
6909 * Emits a standard prolog.
6910 */
6911static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6912{
6913#ifdef RT_ARCH_AMD64
6914 /*
6915 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
6916 * reserving 64 bytes for stack variables plus 4 non-register argument
6917 * slots. Fixed register assignment: xBX = pReNative;
6918 *
6919 * Since we always do the same register spilling, we can use the same
6920 * unwind description for all the code.
6921 */
6922 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6923 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
6924 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
6925 pbCodeBuf[off++] = 0x8b;
6926 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
6927 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
6928 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
6929# ifdef RT_OS_WINDOWS
6930 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
6931 pbCodeBuf[off++] = 0x8b;
6932 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
6933 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
6934 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
6935# else
6936 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
6937 pbCodeBuf[off++] = 0x8b;
6938 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
6939# endif
6940 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
6941 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
6942 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
6943 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
6944 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
6945 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
6946 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
6947 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
6948
6949# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6950 /* Save the frame pointer. */
6951 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
6952# endif
6953
6954 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
6955 X86_GREG_xSP,
6956 IEMNATIVE_FRAME_ALIGN_SIZE
6957 + IEMNATIVE_FRAME_VAR_SIZE
6958 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
6959 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
6960 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
6961 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
6962 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
6963
6964#elif RT_ARCH_ARM64
6965 /*
6966 * We set up a stack frame exactly like on x86, only we have to push the
6967 * return address our selves here. We save all non-volatile registers.
6968 */
6969 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
6970
6971# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
6972 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
6973 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
6974 * in any way conditional, so just emitting this instructions now and hoping for the best... */
6975 /* pacibsp */
6976 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
6977# endif
6978
6979 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
6980 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
6981 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6982 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6983 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
6984 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
6985 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6986 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6987 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6988 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6989 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6990 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6991 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6992 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6993 /* Save the BP and LR (ret address) registers at the top of the frame. */
6994 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6995 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6996 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6997 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
6998 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
6999 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
7000
7001 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
7002 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
7003
7004 /* mov r28, r0 */
7005 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
7006 /* mov r27, r1 */
7007 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
7008
7009# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
7010 /* Save the frame pointer. */
7011 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
7012 ARMV8_A64_REG_X2);
7013# endif
7014
7015#else
7016# error "port me"
7017#endif
7018 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7019 return off;
7020}
7021
7022
7023/*********************************************************************************************************************************
7024* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
7025*********************************************************************************************************************************/
7026
7027/**
7028 * Internal work that allocates a variable with kind set to
7029 * kIemNativeVarKind_Invalid and no current stack allocation.
7030 *
7031 * The kind will either be set by the caller or later when the variable is first
7032 * assigned a value.
7033 *
7034 * @returns Unpacked index.
7035 * @internal
7036 */
7037static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7038{
7039 Assert(cbType > 0 && cbType <= 64);
7040 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
7041 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
7042 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
7043 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7044 pReNative->Core.aVars[idxVar].cbVar = cbType;
7045 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7046 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7047 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
7048 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
7049 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
7050 pReNative->Core.aVars[idxVar].fRegAcquired = false;
7051 pReNative->Core.aVars[idxVar].u.uValue = 0;
7052 return idxVar;
7053}
7054
7055
7056/**
7057 * Internal work that allocates an argument variable w/o setting enmKind.
7058 *
7059 * @returns Unpacked index.
7060 * @internal
7061 */
7062static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7063{
7064 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
7065 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7066 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
7067
7068 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7069 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
7070 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
7071 return idxVar;
7072}
7073
7074
7075/**
7076 * Gets the stack slot for a stack variable, allocating one if necessary.
7077 *
7078 * Calling this function implies that the stack slot will contain a valid
7079 * variable value. The caller deals with any register currently assigned to the
7080 * variable, typically by spilling it into the stack slot.
7081 *
7082 * @returns The stack slot number.
7083 * @param pReNative The recompiler state.
7084 * @param idxVar The variable.
7085 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
7086 */
7087DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7088{
7089 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7090 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7091 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
7092
7093 /* Already got a slot? */
7094 uint8_t const idxStackSlot = pVar->idxStackSlot;
7095 if (idxStackSlot != UINT8_MAX)
7096 {
7097 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
7098 return idxStackSlot;
7099 }
7100
7101 /*
7102 * A single slot is easy to allocate.
7103 * Allocate them from the top end, closest to BP, to reduce the displacement.
7104 */
7105 if (pVar->cbVar <= sizeof(uint64_t))
7106 {
7107 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7108 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7109 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
7110 pVar->idxStackSlot = (uint8_t)iSlot;
7111 Log11(("iemNativeVarSetKindToStack: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
7112 return (uint8_t)iSlot;
7113 }
7114
7115 /*
7116 * We need more than one stack slot.
7117 *
7118 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
7119 */
7120 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
7121 Assert(pVar->cbVar <= 64);
7122 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
7123 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
7124 uint32_t bmStack = ~pReNative->Core.bmStack;
7125 while (bmStack != UINT32_MAX)
7126 {
7127/** @todo allocate from the top to reduce BP displacement. */
7128 unsigned const iSlot = ASMBitFirstSetU32(bmStack) - 1;
7129 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7130 if (!(iSlot & fBitAlignMask))
7131 {
7132 if ((bmStack & (fBitAllocMask << iSlot)) == (fBitAllocMask << iSlot))
7133 {
7134 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
7135 pVar->idxStackSlot = (uint8_t)iSlot;
7136 Log11(("iemNativeVarSetKindToStack: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7137 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
7138 return (uint8_t)iSlot;
7139 }
7140 }
7141 bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);
7142 }
7143 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7144}
7145
7146
7147/**
7148 * Changes the variable to a stack variable.
7149 *
7150 * Currently this is s only possible to do the first time the variable is used,
7151 * switching later is can be implemented but not done.
7152 *
7153 * @param pReNative The recompiler state.
7154 * @param idxVar The variable.
7155 * @throws VERR_IEM_VAR_IPE_2
7156 */
7157DECL_HIDDEN_THROW(void) iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7158{
7159 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7160 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7161 if (pVar->enmKind != kIemNativeVarKind_Stack)
7162 {
7163 /* We could in theory transition from immediate to stack as well, but it
7164 would involve the caller doing work storing the value on the stack. So,
7165 till that's required we only allow transition from invalid. */
7166 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7167 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7168 pVar->enmKind = kIemNativeVarKind_Stack;
7169
7170 /* Note! We don't allocate a stack slot here, that's only done when a
7171 slot is actually needed to hold a variable value. */
7172 }
7173}
7174
7175
7176/**
7177 * Sets it to a variable with a constant value.
7178 *
7179 * This does not require stack storage as we know the value and can always
7180 * reload it, unless of course it's referenced.
7181 *
7182 * @param pReNative The recompiler state.
7183 * @param idxVar The variable.
7184 * @param uValue The immediate value.
7185 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7186 */
7187DECL_HIDDEN_THROW(void) iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
7188{
7189 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7190 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7191 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7192 {
7193 /* Only simple transitions for now. */
7194 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7195 pVar->enmKind = kIemNativeVarKind_Immediate;
7196 }
7197 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7198
7199 pVar->u.uValue = uValue;
7200 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
7201 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
7202 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
7203}
7204
7205
7206/**
7207 * Sets the variable to a reference (pointer) to @a idxOtherVar.
7208 *
7209 * This does not require stack storage as we know the value and can always
7210 * reload it. Loading is postponed till needed.
7211 *
7212 * @param pReNative The recompiler state.
7213 * @param idxVar The variable. Unpacked.
7214 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
7215 *
7216 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7217 * @internal
7218 */
7219static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
7220{
7221 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
7222 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
7223
7224 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
7225 {
7226 /* Only simple transitions for now. */
7227 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7228 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7229 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
7230 }
7231 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7232
7233 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
7234
7235 /* Update the other variable, ensure it's a stack variable. */
7236 /** @todo handle variables with const values... that'll go boom now. */
7237 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
7238 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
7239}
7240
7241
7242/**
7243 * Sets the variable to a reference (pointer) to a guest register reference.
7244 *
7245 * This does not require stack storage as we know the value and can always
7246 * reload it. Loading is postponed till needed.
7247 *
7248 * @param pReNative The recompiler state.
7249 * @param idxVar The variable.
7250 * @param enmRegClass The class guest registers to reference.
7251 * @param idxReg The register within @a enmRegClass to reference.
7252 *
7253 * @throws VERR_IEM_VAR_IPE_2
7254 */
7255DECL_HIDDEN_THROW(void) iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
7256 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
7257{
7258 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7259 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7260
7261 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
7262 {
7263 /* Only simple transitions for now. */
7264 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7265 pVar->enmKind = kIemNativeVarKind_GstRegRef;
7266 }
7267 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7268
7269 pVar->u.GstRegRef.enmClass = enmRegClass;
7270 pVar->u.GstRegRef.idx = idxReg;
7271}
7272
7273
7274DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7275{
7276 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7277}
7278
7279
7280DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
7281{
7282 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7283
7284 /* Since we're using a generic uint64_t value type, we must truncate it if
7285 the variable is smaller otherwise we may end up with too large value when
7286 scaling up a imm8 w/ sign-extension.
7287
7288 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
7289 in the bios, bx=1) when running on arm, because clang expect 16-bit
7290 register parameters to have bits 16 and up set to zero. Instead of
7291 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
7292 CF value in the result. */
7293 switch (cbType)
7294 {
7295 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7296 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7297 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7298 }
7299 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7300 return idxVar;
7301}
7302
7303
7304DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
7305{
7306 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
7307 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
7308 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
7309 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
7310 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
7311 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7312
7313 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
7314 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
7315 return idxArgVar;
7316}
7317
7318
7319DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7320{
7321 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7322 /* Don't set to stack now, leave that to the first use as for instance
7323 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
7324 return idxVar;
7325}
7326
7327
7328DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
7329{
7330 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7331
7332 /* Since we're using a generic uint64_t value type, we must truncate it if
7333 the variable is smaller otherwise we may end up with too large value when
7334 scaling up a imm8 w/ sign-extension. */
7335 switch (cbType)
7336 {
7337 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7338 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7339 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7340 }
7341 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7342 return idxVar;
7343}
7344
7345
7346/**
7347 * Makes sure variable @a idxVar has a register assigned to it and that it stays
7348 * fixed till we call iemNativeVarRegisterRelease.
7349 *
7350 * @returns The host register number.
7351 * @param pReNative The recompiler state.
7352 * @param idxVar The variable.
7353 * @param poff Pointer to the instruction buffer offset.
7354 * In case a register needs to be freed up or the value
7355 * loaded off the stack.
7356 * @param fInitialized Set if the variable must already have been initialized.
7357 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7358 * the case.
7359 * @param idxRegPref Preferred register number or UINT8_MAX.
7360 */
7361DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7362 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7363{
7364 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7365 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7366 Assert(pVar->cbVar <= 8);
7367 Assert(!pVar->fRegAcquired);
7368
7369 uint8_t idxReg = pVar->idxReg;
7370 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7371 {
7372 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7373 && pVar->enmKind < kIemNativeVarKind_End);
7374 pVar->fRegAcquired = true;
7375 return idxReg;
7376 }
7377
7378 /*
7379 * If the kind of variable has not yet been set, default to 'stack'.
7380 */
7381 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7382 && pVar->enmKind < kIemNativeVarKind_End);
7383 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7384 iemNativeVarSetKindToStack(pReNative, idxVar);
7385
7386 /*
7387 * We have to allocate a register for the variable, even if its a stack one
7388 * as we don't know if there are modification being made to it before its
7389 * finalized (todo: analyze and insert hints about that?).
7390 *
7391 * If we can, we try get the correct register for argument variables. This
7392 * is assuming that most argument variables are fetched as close as possible
7393 * to the actual call, so that there aren't any interfering hidden calls
7394 * (memory accesses, etc) inbetween.
7395 *
7396 * If we cannot or it's a variable, we make sure no argument registers
7397 * that will be used by this MC block will be allocated here, and we always
7398 * prefer non-volatile registers to avoid needing to spill stuff for internal
7399 * call.
7400 */
7401 /** @todo Detect too early argument value fetches and warn about hidden
7402 * calls causing less optimal code to be generated in the python script. */
7403
7404 uint8_t const uArgNo = pVar->uArgNo;
7405 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
7406 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
7407 {
7408 idxReg = g_aidxIemNativeCallRegs[uArgNo];
7409 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7410 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
7411 }
7412 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
7413 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
7414 {
7415 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7416 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
7417 & ~pReNative->Core.bmHstRegsWithGstShadow
7418 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
7419 & fNotArgsMask;
7420 if (fRegs)
7421 {
7422 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
7423 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
7424 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
7425 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
7426 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
7427 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7428 }
7429 else
7430 {
7431 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7432 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
7433 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7434 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7435 }
7436 }
7437 else
7438 {
7439 idxReg = idxRegPref;
7440 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7441 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7442 }
7443 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7444 pVar->idxReg = idxReg;
7445
7446 /*
7447 * Load it off the stack if we've got a stack slot.
7448 */
7449 uint8_t const idxStackSlot = pVar->idxStackSlot;
7450 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7451 {
7452 Assert(fInitialized);
7453 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7454 switch (pVar->cbVar)
7455 {
7456 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
7457 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
7458 case 3: AssertFailed(); RT_FALL_THRU();
7459 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
7460 default: AssertFailed(); RT_FALL_THRU();
7461 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
7462 }
7463 }
7464 else
7465 {
7466 Assert(idxStackSlot == UINT8_MAX);
7467 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7468 }
7469 pVar->fRegAcquired = true;
7470 return idxReg;
7471}
7472
7473
7474/**
7475 * The value of variable @a idxVar will be written in full to the @a enmGstReg
7476 * guest register.
7477 *
7478 * This function makes sure there is a register for it and sets it to be the
7479 * current shadow copy of @a enmGstReg.
7480 *
7481 * @returns The host register number.
7482 * @param pReNative The recompiler state.
7483 * @param idxVar The variable.
7484 * @param enmGstReg The guest register this variable will be written to
7485 * after this call.
7486 * @param poff Pointer to the instruction buffer offset.
7487 * In case a register needs to be freed up or if the
7488 * variable content needs to be loaded off the stack.
7489 *
7490 * @note We DO NOT expect @a idxVar to be an argument variable,
7491 * because we can only in the commit stage of an instruction when this
7492 * function is used.
7493 */
7494DECL_HIDDEN_THROW(uint8_t)
7495iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
7496{
7497 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7498 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7499 Assert(!pVar->fRegAcquired);
7500 AssertMsgStmt( pVar->cbVar <= 8
7501 && ( pVar->enmKind == kIemNativeVarKind_Immediate
7502 || pVar->enmKind == kIemNativeVarKind_Stack),
7503 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
7504 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
7505 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7506
7507 /*
7508 * This shouldn't ever be used for arguments, unless it's in a weird else
7509 * branch that doesn't do any calling and even then it's questionable.
7510 *
7511 * However, in case someone writes crazy wrong MC code and does register
7512 * updates before making calls, just use the regular register allocator to
7513 * ensure we get a register suitable for the intended argument number.
7514 */
7515 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
7516
7517 /*
7518 * If there is already a register for the variable, we transfer/set the
7519 * guest shadow copy assignment to it.
7520 */
7521 uint8_t idxReg = pVar->idxReg;
7522 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7523 {
7524 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
7525 {
7526 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
7527 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
7528 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
7529 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
7530 }
7531 else
7532 {
7533 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
7534 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
7535 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
7536 }
7537 /** @todo figure this one out. We need some way of making sure the register isn't
7538 * modified after this point, just in case we start writing crappy MC code. */
7539 pVar->enmGstReg = enmGstReg;
7540 pVar->fRegAcquired = true;
7541 return idxReg;
7542 }
7543 Assert(pVar->uArgNo == UINT8_MAX);
7544
7545 /*
7546 * Because this is supposed to be the commit stage, we're just tag along with the
7547 * temporary register allocator and upgrade it to a variable register.
7548 */
7549 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
7550 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
7551 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
7552 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
7553 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
7554 pVar->idxReg = idxReg;
7555
7556 /*
7557 * Now we need to load the register value.
7558 */
7559 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7560 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7561 else
7562 {
7563 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7564 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7565 switch (pVar->cbVar)
7566 {
7567 case sizeof(uint64_t):
7568 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
7569 break;
7570 case sizeof(uint32_t):
7571 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
7572 break;
7573 case sizeof(uint16_t):
7574 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
7575 break;
7576 case sizeof(uint8_t):
7577 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
7578 break;
7579 default:
7580 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7581 }
7582 }
7583
7584 pVar->fRegAcquired = true;
7585 return idxReg;
7586}
7587
7588
7589/**
7590 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
7591 *
7592 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
7593 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
7594 * requirement of flushing anything in volatile host registers when making a
7595 * call.
7596 *
7597 * @returns New @a off value.
7598 * @param pReNative The recompiler state.
7599 * @param off The code buffer position.
7600 * @param fHstRegsNotToSave Set of registers not to save & restore.
7601 */
7602DECL_HIDDEN_THROW(uint32_t)
7603iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7604{
7605 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7606 if (fHstRegs)
7607 {
7608 do
7609 {
7610 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7611 fHstRegs &= ~RT_BIT_32(idxHstReg);
7612
7613 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7614 {
7615 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7616 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7617 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7618 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7619 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7620 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7621 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7622 {
7623 case kIemNativeVarKind_Stack:
7624 {
7625 /* Temporarily spill the variable register. */
7626 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7627 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7628 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7629 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7630 continue;
7631 }
7632
7633 case kIemNativeVarKind_Immediate:
7634 case kIemNativeVarKind_VarRef:
7635 case kIemNativeVarKind_GstRegRef:
7636 /* It is weird to have any of these loaded at this point. */
7637 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7638 continue;
7639
7640 case kIemNativeVarKind_End:
7641 case kIemNativeVarKind_Invalid:
7642 break;
7643 }
7644 AssertFailed();
7645 }
7646 else
7647 {
7648 /*
7649 * Allocate a temporary stack slot and spill the register to it.
7650 */
7651 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7652 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
7653 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7654 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
7655 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
7656 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7657 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7658 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7659 }
7660 } while (fHstRegs);
7661 }
7662 return off;
7663}
7664
7665
7666/**
7667 * Emit code to restore volatile registers after to a call to a helper.
7668 *
7669 * @returns New @a off value.
7670 * @param pReNative The recompiler state.
7671 * @param off The code buffer position.
7672 * @param fHstRegsNotToSave Set of registers not to save & restore.
7673 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
7674 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
7675 */
7676DECL_HIDDEN_THROW(uint32_t)
7677iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7678{
7679 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7680 if (fHstRegs)
7681 {
7682 do
7683 {
7684 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7685 fHstRegs &= ~RT_BIT_32(idxHstReg);
7686
7687 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7688 {
7689 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7690 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7691 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7692 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7693 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7694 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7695 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7696 {
7697 case kIemNativeVarKind_Stack:
7698 {
7699 /* Unspill the variable register. */
7700 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7701 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
7702 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7703 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7704 continue;
7705 }
7706
7707 case kIemNativeVarKind_Immediate:
7708 case kIemNativeVarKind_VarRef:
7709 case kIemNativeVarKind_GstRegRef:
7710 /* It is weird to have any of these loaded at this point. */
7711 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7712 continue;
7713
7714 case kIemNativeVarKind_End:
7715 case kIemNativeVarKind_Invalid:
7716 break;
7717 }
7718 AssertFailed();
7719 }
7720 else
7721 {
7722 /*
7723 * Restore from temporary stack slot.
7724 */
7725 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
7726 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
7727 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
7728 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
7729
7730 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7731 }
7732 } while (fHstRegs);
7733 }
7734 return off;
7735}
7736
7737
7738/**
7739 * Worker that frees the stack slots for variable @a idxVar if any allocated.
7740 *
7741 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
7742 *
7743 * ASSUMES that @a idxVar is valid and unpacked.
7744 */
7745DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7746{
7747 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
7748 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
7749 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7750 {
7751 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
7752 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
7753 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
7754 Assert(cSlots > 0);
7755 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
7756 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7757 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
7758 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
7759 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7760 }
7761 else
7762 Assert(idxStackSlot == UINT8_MAX);
7763}
7764
7765
7766/**
7767 * Worker that frees a single variable.
7768 *
7769 * ASSUMES that @a idxVar is valid and unpacked.
7770 */
7771DECLHIDDEN(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7772{
7773 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
7774 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
7775 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
7776
7777 /* Free the host register first if any assigned. */
7778 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7779 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7780 {
7781 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
7782 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
7783 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7784 }
7785
7786 /* Free argument mapping. */
7787 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
7788 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
7789 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
7790
7791 /* Free the stack slots. */
7792 iemNativeVarFreeStackSlots(pReNative, idxVar);
7793
7794 /* Free the actual variable. */
7795 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7796 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
7797}
7798
7799
7800/**
7801 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
7802 */
7803DECLHIDDEN(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
7804{
7805 while (bmVars != 0)
7806 {
7807 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
7808 bmVars &= ~RT_BIT_32(idxVar);
7809
7810#if 1 /** @todo optimize by simplifying this later... */
7811 iemNativeVarFreeOneWorker(pReNative, idxVar);
7812#else
7813 /* Only need to free the host register, the rest is done as bulk updates below. */
7814 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7815 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7816 {
7817 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
7818 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
7819 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7820 }
7821#endif
7822 }
7823#if 0 /** @todo optimize by simplifying this later... */
7824 pReNative->Core.bmVars = 0;
7825 pReNative->Core.bmStack = 0;
7826 pReNative->Core.u64ArgVars = UINT64_MAX;
7827#endif
7828}
7829
7830
7831
7832/*********************************************************************************************************************************
7833* Emitters for IEM_MC_CALL_CIMPL_XXX *
7834*********************************************************************************************************************************/
7835
7836/**
7837 * Emits code to load a reference to the given guest register into @a idxGprDst.
7838 */
7839DECL_INLINE_THROW(uint32_t)
7840iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
7841 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
7842{
7843#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7844 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
7845#endif
7846
7847 /*
7848 * Get the offset relative to the CPUMCTX structure.
7849 */
7850 uint32_t offCpumCtx;
7851 switch (enmClass)
7852 {
7853 case kIemNativeGstRegRef_Gpr:
7854 Assert(idxRegInClass < 16);
7855 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
7856 break;
7857
7858 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
7859 Assert(idxRegInClass < 4);
7860 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
7861 break;
7862
7863 case kIemNativeGstRegRef_EFlags:
7864 Assert(idxRegInClass == 0);
7865 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
7866 break;
7867
7868 case kIemNativeGstRegRef_MxCsr:
7869 Assert(idxRegInClass == 0);
7870 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
7871 break;
7872
7873 case kIemNativeGstRegRef_FpuReg:
7874 Assert(idxRegInClass < 8);
7875 AssertFailed(); /** @todo what kind of indexing? */
7876 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
7877 break;
7878
7879 case kIemNativeGstRegRef_MReg:
7880 Assert(idxRegInClass < 8);
7881 AssertFailed(); /** @todo what kind of indexing? */
7882 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
7883 break;
7884
7885 case kIemNativeGstRegRef_XReg:
7886 Assert(idxRegInClass < 16);
7887 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
7888 break;
7889
7890 default:
7891 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
7892 }
7893
7894 /*
7895 * Load the value into the destination register.
7896 */
7897#ifdef RT_ARCH_AMD64
7898 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
7899
7900#elif defined(RT_ARCH_ARM64)
7901 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7902 Assert(offCpumCtx < 4096);
7903 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
7904
7905#else
7906# error "Port me!"
7907#endif
7908
7909 return off;
7910}
7911
7912
7913/**
7914 * Common code for CIMPL and AIMPL calls.
7915 *
7916 * These are calls that uses argument variables and such. They should not be
7917 * confused with internal calls required to implement an MC operation,
7918 * like a TLB load and similar.
7919 *
7920 * Upon return all that is left to do is to load any hidden arguments and
7921 * perform the call. All argument variables are freed.
7922 *
7923 * @returns New code buffer offset; throws VBox status code on error.
7924 * @param pReNative The native recompile state.
7925 * @param off The code buffer offset.
7926 * @param cArgs The total nubmer of arguments (includes hidden
7927 * count).
7928 * @param cHiddenArgs The number of hidden arguments. The hidden
7929 * arguments must not have any variable declared for
7930 * them, whereas all the regular arguments must
7931 * (tstIEMCheckMc ensures this).
7932 */
7933DECL_HIDDEN_THROW(uint32_t)
7934iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
7935{
7936#ifdef VBOX_STRICT
7937 /*
7938 * Assert sanity.
7939 */
7940 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
7941 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
7942 for (unsigned i = 0; i < cHiddenArgs; i++)
7943 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
7944 for (unsigned i = cHiddenArgs; i < cArgs; i++)
7945 {
7946 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
7947 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
7948 }
7949 iemNativeRegAssertSanity(pReNative);
7950#endif
7951
7952 /* We don't know what the called function makes use of, so flush any pending register writes. */
7953 off = iemNativeRegFlushPendingWrites(pReNative, off);
7954
7955 /*
7956 * Before we do anything else, go over variables that are referenced and
7957 * make sure they are not in a register.
7958 */
7959 uint32_t bmVars = pReNative->Core.bmVars;
7960 if (bmVars)
7961 {
7962 do
7963 {
7964 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
7965 bmVars &= ~RT_BIT_32(idxVar);
7966
7967 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
7968 {
7969 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
7970 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
7971 {
7972 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
7973 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
7974 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
7975 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7976 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
7977
7978 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7979 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
7980 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
7981 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
7982 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
7983 }
7984 }
7985 } while (bmVars != 0);
7986#if 0 //def VBOX_STRICT
7987 iemNativeRegAssertSanity(pReNative);
7988#endif
7989 }
7990
7991 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
7992
7993 /*
7994 * First, go over the host registers that will be used for arguments and make
7995 * sure they either hold the desired argument or are free.
7996 */
7997 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
7998 {
7999 for (uint32_t i = 0; i < cRegArgs; i++)
8000 {
8001 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8002 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8003 {
8004 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
8005 {
8006 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
8007 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8008 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8009 Assert(pVar->idxReg == idxArgReg);
8010 uint8_t const uArgNo = pVar->uArgNo;
8011 if (uArgNo == i)
8012 { /* prefect */ }
8013 /* The variable allocator logic should make sure this is impossible,
8014 except for when the return register is used as a parameter (ARM,
8015 but not x86). */
8016#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
8017 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
8018 {
8019# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8020# error "Implement this"
8021# endif
8022 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
8023 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
8024 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
8025 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8026 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
8027 }
8028#endif
8029 else
8030 {
8031 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8032
8033 if (pVar->enmKind == kIemNativeVarKind_Stack)
8034 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
8035 else
8036 {
8037 /* just free it, can be reloaded if used again */
8038 pVar->idxReg = UINT8_MAX;
8039 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
8040 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
8041 }
8042 }
8043 }
8044 else
8045 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
8046 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
8047 }
8048 }
8049#if 0 //def VBOX_STRICT
8050 iemNativeRegAssertSanity(pReNative);
8051#endif
8052 }
8053
8054 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
8055
8056#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8057 /*
8058 * If there are any stack arguments, make sure they are in their place as well.
8059 *
8060 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
8061 * the caller) be loading it later and it must be free (see first loop).
8062 */
8063 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
8064 {
8065 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
8066 {
8067 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8068 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
8069 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8070 {
8071 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
8072 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
8073 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
8074 pVar->idxReg = UINT8_MAX;
8075 }
8076 else
8077 {
8078 /* Use ARG0 as temp for stuff we need registers for. */
8079 switch (pVar->enmKind)
8080 {
8081 case kIemNativeVarKind_Stack:
8082 {
8083 uint8_t const idxStackSlot = pVar->idxStackSlot;
8084 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8085 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
8086 iemNativeStackCalcBpDisp(idxStackSlot));
8087 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8088 continue;
8089 }
8090
8091 case kIemNativeVarKind_Immediate:
8092 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
8093 continue;
8094
8095 case kIemNativeVarKind_VarRef:
8096 {
8097 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8098 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8099 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8100 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8101 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8102 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8103 {
8104 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8105 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8106 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8107 }
8108 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8109 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8110 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
8111 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8112 continue;
8113 }
8114
8115 case kIemNativeVarKind_GstRegRef:
8116 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
8117 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8118 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8119 continue;
8120
8121 case kIemNativeVarKind_Invalid:
8122 case kIemNativeVarKind_End:
8123 break;
8124 }
8125 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8126 }
8127 }
8128# if 0 //def VBOX_STRICT
8129 iemNativeRegAssertSanity(pReNative);
8130# endif
8131 }
8132#else
8133 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
8134#endif
8135
8136 /*
8137 * Make sure the argument variables are loaded into their respective registers.
8138 *
8139 * We can optimize this by ASSUMING that any register allocations are for
8140 * registeres that have already been loaded and are ready. The previous step
8141 * saw to that.
8142 */
8143 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
8144 {
8145 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8146 {
8147 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8148 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8149 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
8150 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
8151 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
8152 else
8153 {
8154 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8155 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8156 {
8157 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
8158 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
8159 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
8160 | RT_BIT_32(idxArgReg);
8161 pVar->idxReg = idxArgReg;
8162 }
8163 else
8164 {
8165 /* Use ARG0 as temp for stuff we need registers for. */
8166 switch (pVar->enmKind)
8167 {
8168 case kIemNativeVarKind_Stack:
8169 {
8170 uint8_t const idxStackSlot = pVar->idxStackSlot;
8171 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8172 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
8173 continue;
8174 }
8175
8176 case kIemNativeVarKind_Immediate:
8177 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
8178 continue;
8179
8180 case kIemNativeVarKind_VarRef:
8181 {
8182 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8183 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8184 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
8185 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8186 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8187 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8188 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8189 {
8190 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8191 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8192 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8193 }
8194 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8195 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8196 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
8197 continue;
8198 }
8199
8200 case kIemNativeVarKind_GstRegRef:
8201 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
8202 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8203 continue;
8204
8205 case kIemNativeVarKind_Invalid:
8206 case kIemNativeVarKind_End:
8207 break;
8208 }
8209 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8210 }
8211 }
8212 }
8213#if 0 //def VBOX_STRICT
8214 iemNativeRegAssertSanity(pReNative);
8215#endif
8216 }
8217#ifdef VBOX_STRICT
8218 else
8219 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8220 {
8221 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
8222 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
8223 }
8224#endif
8225
8226 /*
8227 * Free all argument variables (simplified).
8228 * Their lifetime always expires with the call they are for.
8229 */
8230 /** @todo Make the python script check that arguments aren't used after
8231 * IEM_MC_CALL_XXXX. */
8232 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
8233 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
8234 * an argument value. There is also some FPU stuff. */
8235 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
8236 {
8237 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
8238 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8239
8240 /* no need to free registers: */
8241 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
8242 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
8243 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
8244 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
8245 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
8246 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
8247
8248 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
8249 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8250 iemNativeVarFreeStackSlots(pReNative, idxVar);
8251 }
8252 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8253
8254 /*
8255 * Flush volatile registers as we make the call.
8256 */
8257 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
8258
8259 return off;
8260}
8261
8262
8263
8264/*********************************************************************************************************************************
8265* TLB Lookup. *
8266*********************************************************************************************************************************/
8267
8268/**
8269 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
8270 */
8271DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
8272{
8273 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
8274 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
8275 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
8276 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
8277
8278 /* Do the lookup manually. */
8279 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
8280 uint64_t const uTag = IEMTLB_CALC_TAG( &pVCpu->iem.s.DataTlb, GCPtrFlat);
8281 PIEMTLBENTRY const pTlbe = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
8282 if (RT_LIKELY(pTlbe->uTag == uTag))
8283 {
8284 /*
8285 * Check TLB page table level access flags.
8286 */
8287 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
8288 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
8289 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
8290 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
8291 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
8292 | IEMTLBE_F_PG_UNASSIGNED
8293 | IEMTLBE_F_PT_NO_ACCESSED
8294 | fNoWriteNoDirty | fNoUser);
8295 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
8296 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
8297 {
8298 /*
8299 * Return the address.
8300 */
8301 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
8302 if ((uintptr_t)pbAddr == uResult)
8303 return;
8304 RT_NOREF(cbMem);
8305 AssertFailed();
8306 }
8307 else
8308 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
8309 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
8310 }
8311 else
8312 AssertFailed();
8313 RT_BREAKPOINT();
8314}
8315
8316/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
8317
8318
8319
8320/*********************************************************************************************************************************
8321* Recompiler Core. *
8322*********************************************************************************************************************************/
8323
8324/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
8325static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
8326{
8327 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
8328 pDis->cbCachedInstr += cbMaxRead;
8329 RT_NOREF(cbMinRead);
8330 return VERR_NO_DATA;
8331}
8332
8333
8334DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
8335{
8336 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
8337 {
8338#define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */
8339 ENTRY(fLocalForcedActions),
8340 ENTRY(iem.s.rcPassUp),
8341 ENTRY(iem.s.fExec),
8342 ENTRY(iem.s.pbInstrBuf),
8343 ENTRY(iem.s.uInstrBufPc),
8344 ENTRY(iem.s.GCPhysInstrBuf),
8345 ENTRY(iem.s.cbInstrBufTotal),
8346 ENTRY(iem.s.idxTbCurInstr),
8347#ifdef VBOX_WITH_STATISTICS
8348 ENTRY(iem.s.StatNativeTlbHitsForFetch),
8349 ENTRY(iem.s.StatNativeTlbHitsForStore),
8350 ENTRY(iem.s.StatNativeTlbHitsForStack),
8351 ENTRY(iem.s.StatNativeTlbHitsForMapped),
8352 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
8353 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
8354 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
8355 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
8356#endif
8357 ENTRY(iem.s.DataTlb.aEntries),
8358 ENTRY(iem.s.DataTlb.uTlbRevision),
8359 ENTRY(iem.s.DataTlb.uTlbPhysRev),
8360 ENTRY(iem.s.DataTlb.cTlbHits),
8361 ENTRY(iem.s.CodeTlb.aEntries),
8362 ENTRY(iem.s.CodeTlb.uTlbRevision),
8363 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
8364 ENTRY(iem.s.CodeTlb.cTlbHits),
8365 ENTRY(pVMR3),
8366 ENTRY(cpum.GstCtx.rax),
8367 ENTRY(cpum.GstCtx.ah),
8368 ENTRY(cpum.GstCtx.rcx),
8369 ENTRY(cpum.GstCtx.ch),
8370 ENTRY(cpum.GstCtx.rdx),
8371 ENTRY(cpum.GstCtx.dh),
8372 ENTRY(cpum.GstCtx.rbx),
8373 ENTRY(cpum.GstCtx.bh),
8374 ENTRY(cpum.GstCtx.rsp),
8375 ENTRY(cpum.GstCtx.rbp),
8376 ENTRY(cpum.GstCtx.rsi),
8377 ENTRY(cpum.GstCtx.rdi),
8378 ENTRY(cpum.GstCtx.r8),
8379 ENTRY(cpum.GstCtx.r9),
8380 ENTRY(cpum.GstCtx.r10),
8381 ENTRY(cpum.GstCtx.r11),
8382 ENTRY(cpum.GstCtx.r12),
8383 ENTRY(cpum.GstCtx.r13),
8384 ENTRY(cpum.GstCtx.r14),
8385 ENTRY(cpum.GstCtx.r15),
8386 ENTRY(cpum.GstCtx.es.Sel),
8387 ENTRY(cpum.GstCtx.es.u64Base),
8388 ENTRY(cpum.GstCtx.es.u32Limit),
8389 ENTRY(cpum.GstCtx.es.Attr),
8390 ENTRY(cpum.GstCtx.cs.Sel),
8391 ENTRY(cpum.GstCtx.cs.u64Base),
8392 ENTRY(cpum.GstCtx.cs.u32Limit),
8393 ENTRY(cpum.GstCtx.cs.Attr),
8394 ENTRY(cpum.GstCtx.ss.Sel),
8395 ENTRY(cpum.GstCtx.ss.u64Base),
8396 ENTRY(cpum.GstCtx.ss.u32Limit),
8397 ENTRY(cpum.GstCtx.ss.Attr),
8398 ENTRY(cpum.GstCtx.ds.Sel),
8399 ENTRY(cpum.GstCtx.ds.u64Base),
8400 ENTRY(cpum.GstCtx.ds.u32Limit),
8401 ENTRY(cpum.GstCtx.ds.Attr),
8402 ENTRY(cpum.GstCtx.fs.Sel),
8403 ENTRY(cpum.GstCtx.fs.u64Base),
8404 ENTRY(cpum.GstCtx.fs.u32Limit),
8405 ENTRY(cpum.GstCtx.fs.Attr),
8406 ENTRY(cpum.GstCtx.gs.Sel),
8407 ENTRY(cpum.GstCtx.gs.u64Base),
8408 ENTRY(cpum.GstCtx.gs.u32Limit),
8409 ENTRY(cpum.GstCtx.gs.Attr),
8410 ENTRY(cpum.GstCtx.rip),
8411 ENTRY(cpum.GstCtx.eflags),
8412 ENTRY(cpum.GstCtx.uRipInhibitInt),
8413#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8414 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
8415 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
8416 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
8417 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
8418 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
8419 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
8420 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
8421 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
8422 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
8423 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
8424 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
8425 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
8426 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
8427 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
8428 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
8429 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
8430 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
8431 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
8432 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
8433 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
8434 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
8435 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
8436 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
8437 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
8438 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
8439 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
8440 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
8441 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
8442 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
8443 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
8444 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
8445 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
8446#endif
8447#undef ENTRY
8448 };
8449#ifdef VBOX_STRICT
8450 static bool s_fOrderChecked = false;
8451 if (!s_fOrderChecked)
8452 {
8453 s_fOrderChecked = true;
8454 uint32_t offPrev = s_aMembers[0].off;
8455 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
8456 {
8457 Assert(s_aMembers[i].off > offPrev);
8458 offPrev = s_aMembers[i].off;
8459 }
8460 }
8461#endif
8462
8463 /*
8464 * Binary lookup.
8465 */
8466 unsigned iStart = 0;
8467 unsigned iEnd = RT_ELEMENTS(s_aMembers);
8468 for (;;)
8469 {
8470 unsigned const iCur = iStart + (iEnd - iStart) / 2;
8471 uint32_t const offCur = s_aMembers[iCur].off;
8472 if (off < offCur)
8473 {
8474 if (iCur != iStart)
8475 iEnd = iCur;
8476 else
8477 break;
8478 }
8479 else if (off > offCur)
8480 {
8481 if (iCur + 1 < iEnd)
8482 iStart = iCur + 1;
8483 else
8484 break;
8485 }
8486 else
8487 return s_aMembers[iCur].pszName;
8488 }
8489#ifdef VBOX_WITH_STATISTICS
8490 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
8491 return "iem.s.acThreadedFuncStats[iFn]";
8492#endif
8493 return NULL;
8494}
8495
8496
8497DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
8498{
8499 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
8500#if defined(RT_ARCH_AMD64)
8501 static const char * const a_apszMarkers[] =
8502 {
8503 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
8504 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
8505 };
8506#endif
8507
8508 char szDisBuf[512];
8509 DISSTATE Dis;
8510 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
8511 uint32_t const cNative = pTb->Native.cInstructions;
8512 uint32_t offNative = 0;
8513#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8514 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
8515#endif
8516 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
8517 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
8518 : DISCPUMODE_64BIT;
8519#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8520 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
8521#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8522 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
8523#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8524# error "Port me"
8525#else
8526 csh hDisasm = ~(size_t)0;
8527# if defined(RT_ARCH_AMD64)
8528 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
8529# elif defined(RT_ARCH_ARM64)
8530 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
8531# else
8532# error "Port me"
8533# endif
8534 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
8535
8536 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
8537 //Assert(rcCs == CS_ERR_OK);
8538#endif
8539
8540 /*
8541 * Print TB info.
8542 */
8543 pHlp->pfnPrintf(pHlp,
8544 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
8545 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
8546 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
8547 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
8548#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8549 if (pDbgInfo && pDbgInfo->cEntries > 1)
8550 {
8551 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
8552
8553 /*
8554 * This disassembly is driven by the debug info which follows the native
8555 * code and indicates when it starts with the next guest instructions,
8556 * where labels are and such things.
8557 */
8558 uint32_t idxThreadedCall = 0;
8559 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
8560 uint8_t idxRange = UINT8_MAX;
8561 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
8562 uint32_t offRange = 0;
8563 uint32_t offOpcodes = 0;
8564 uint32_t const cbOpcodes = pTb->cbOpcodes;
8565 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
8566 uint32_t const cDbgEntries = pDbgInfo->cEntries;
8567 uint32_t iDbgEntry = 1;
8568 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
8569
8570 while (offNative < cNative)
8571 {
8572 /* If we're at or have passed the point where the next chunk of debug
8573 info starts, process it. */
8574 if (offDbgNativeNext <= offNative)
8575 {
8576 offDbgNativeNext = UINT32_MAX;
8577 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
8578 {
8579 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
8580 {
8581 case kIemTbDbgEntryType_GuestInstruction:
8582 {
8583 /* Did the exec flag change? */
8584 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
8585 {
8586 pHlp->pfnPrintf(pHlp,
8587 " fExec change %#08x -> %#08x %s\n",
8588 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
8589 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
8590 szDisBuf, sizeof(szDisBuf)));
8591 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
8592 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
8593 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
8594 : DISCPUMODE_64BIT;
8595 }
8596
8597 /* New opcode range? We need to fend up a spurious debug info entry here for cases
8598 where the compilation was aborted before the opcode was recorded and the actual
8599 instruction was translated to a threaded call. This may happen when we run out
8600 of ranges, or when some complicated interrupts/FFs are found to be pending or
8601 similar. So, we just deal with it here rather than in the compiler code as it
8602 is a lot simpler to do here. */
8603 if ( idxRange == UINT8_MAX
8604 || idxRange >= cRanges
8605 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
8606 {
8607 idxRange += 1;
8608 if (idxRange < cRanges)
8609 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
8610 else
8611 continue;
8612 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
8613 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
8614 + (pTb->aRanges[idxRange].idxPhysPage == 0
8615 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
8616 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
8617 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
8618 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
8619 pTb->aRanges[idxRange].idxPhysPage);
8620 GCPhysPc += offRange;
8621 }
8622
8623 /* Disassemble the instruction. */
8624 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
8625 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
8626 uint32_t cbInstr = 1;
8627 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
8628 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
8629 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
8630 if (RT_SUCCESS(rc))
8631 {
8632 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8633 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8634 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8635 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8636
8637 static unsigned const s_offMarker = 55;
8638 static char const s_szMarker[] = " ; <--- guest";
8639 if (cch < s_offMarker)
8640 {
8641 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
8642 cch = s_offMarker;
8643 }
8644 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
8645 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
8646
8647 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
8648 }
8649 else
8650 {
8651 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
8652 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
8653 cbInstr = 1;
8654 }
8655 GCPhysPc += cbInstr;
8656 offOpcodes += cbInstr;
8657 offRange += cbInstr;
8658 continue;
8659 }
8660
8661 case kIemTbDbgEntryType_ThreadedCall:
8662 pHlp->pfnPrintf(pHlp,
8663 " Call #%u to %s (%u args) - %s\n",
8664 idxThreadedCall,
8665 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
8666 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
8667 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
8668 idxThreadedCall++;
8669 continue;
8670
8671 case kIemTbDbgEntryType_GuestRegShadowing:
8672 {
8673 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
8674 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
8675 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
8676 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
8677 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
8678 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
8679 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
8680 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
8681 else
8682 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
8683 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
8684 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
8685 continue;
8686 }
8687
8688#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8689 case kIemTbDbgEntryType_GuestSimdRegShadowing:
8690 {
8691 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
8692 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
8693 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
8694 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
8695 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
8696 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
8697 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
8698 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
8699 else
8700 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
8701 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
8702 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
8703 continue;
8704 }
8705#endif
8706
8707 case kIemTbDbgEntryType_Label:
8708 {
8709 const char *pszName = "what_the_fudge";
8710 const char *pszComment = "";
8711 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
8712 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
8713 {
8714 case kIemNativeLabelType_Return:
8715 pszName = "Return";
8716 break;
8717 case kIemNativeLabelType_ReturnBreak:
8718 pszName = "ReturnBreak";
8719 break;
8720 case kIemNativeLabelType_ReturnWithFlags:
8721 pszName = "ReturnWithFlags";
8722 break;
8723 case kIemNativeLabelType_NonZeroRetOrPassUp:
8724 pszName = "NonZeroRetOrPassUp";
8725 break;
8726 case kIemNativeLabelType_RaiseGp0:
8727 pszName = "RaiseGp0";
8728 break;
8729 case kIemNativeLabelType_RaiseNm:
8730 pszName = "RaiseNm";
8731 break;
8732 case kIemNativeLabelType_RaiseUd:
8733 pszName = "RaiseUd";
8734 break;
8735 case kIemNativeLabelType_RaiseMf:
8736 pszName = "RaiseMf";
8737 break;
8738 case kIemNativeLabelType_RaiseXf:
8739 pszName = "RaiseXf";
8740 break;
8741 case kIemNativeLabelType_RaiseDe:
8742 pszName = "RaiseDe";
8743 break;
8744 case kIemNativeLabelType_ObsoleteTb:
8745 pszName = "ObsoleteTb";
8746 break;
8747 case kIemNativeLabelType_NeedCsLimChecking:
8748 pszName = "NeedCsLimChecking";
8749 break;
8750 case kIemNativeLabelType_CheckBranchMiss:
8751 pszName = "CheckBranchMiss";
8752 break;
8753 case kIemNativeLabelType_If:
8754 pszName = "If";
8755 fNumbered = true;
8756 break;
8757 case kIemNativeLabelType_Else:
8758 pszName = "Else";
8759 fNumbered = true;
8760 pszComment = " ; regs state restored pre-if-block";
8761 break;
8762 case kIemNativeLabelType_Endif:
8763 pszName = "Endif";
8764 fNumbered = true;
8765 break;
8766 case kIemNativeLabelType_CheckIrq:
8767 pszName = "CheckIrq_CheckVM";
8768 fNumbered = true;
8769 break;
8770 case kIemNativeLabelType_TlbLookup:
8771 pszName = "TlbLookup";
8772 fNumbered = true;
8773 break;
8774 case kIemNativeLabelType_TlbMiss:
8775 pszName = "TlbMiss";
8776 fNumbered = true;
8777 break;
8778 case kIemNativeLabelType_TlbDone:
8779 pszName = "TlbDone";
8780 fNumbered = true;
8781 break;
8782 case kIemNativeLabelType_Invalid:
8783 case kIemNativeLabelType_End:
8784 break;
8785 }
8786 if (fNumbered)
8787 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
8788 else
8789 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
8790 continue;
8791 }
8792
8793 case kIemTbDbgEntryType_NativeOffset:
8794 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
8795 Assert(offDbgNativeNext > offNative);
8796 break;
8797
8798#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8799 case kIemTbDbgEntryType_DelayedPcUpdate:
8800 pHlp->pfnPrintf(pHlp,
8801 " Updating guest PC value by %u (cInstrSkipped=%u)\n",
8802 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
8803 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
8804 continue;
8805#endif
8806
8807 default:
8808 AssertFailed();
8809 }
8810 iDbgEntry++;
8811 break;
8812 }
8813 }
8814
8815 /*
8816 * Disassemble the next native instruction.
8817 */
8818 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
8819# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
8820 uint32_t cbInstr = sizeof(paNative[0]);
8821 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
8822 if (RT_SUCCESS(rc))
8823 {
8824# if defined(RT_ARCH_AMD64)
8825 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
8826 {
8827 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
8828 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
8829 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
8830 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
8831 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
8832 uInfo & 0x8000 ? "recompiled" : "todo");
8833 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
8834 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
8835 else
8836 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
8837 }
8838 else
8839# endif
8840 {
8841 const char *pszAnnotation = NULL;
8842# ifdef RT_ARCH_AMD64
8843 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8844 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8845 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8846 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8847 PCDISOPPARAM pMemOp;
8848 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))
8849 pMemOp = &Dis.Param1;
8850 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))
8851 pMemOp = &Dis.Param2;
8852 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))
8853 pMemOp = &Dis.Param3;
8854 else
8855 pMemOp = NULL;
8856 if ( pMemOp
8857 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
8858 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
8859 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
8860 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
8861
8862#elif defined(RT_ARCH_ARM64)
8863 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
8864 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8865 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8866# else
8867# error "Port me"
8868# endif
8869 if (pszAnnotation)
8870 {
8871 static unsigned const s_offAnnotation = 55;
8872 size_t const cchAnnotation = strlen(pszAnnotation);
8873 size_t cchDis = strlen(szDisBuf);
8874 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
8875 {
8876 if (cchDis < s_offAnnotation)
8877 {
8878 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
8879 cchDis = s_offAnnotation;
8880 }
8881 szDisBuf[cchDis++] = ' ';
8882 szDisBuf[cchDis++] = ';';
8883 szDisBuf[cchDis++] = ' ';
8884 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
8885 }
8886 }
8887 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
8888 }
8889 }
8890 else
8891 {
8892# if defined(RT_ARCH_AMD64)
8893 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
8894 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
8895# elif defined(RT_ARCH_ARM64)
8896 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
8897# else
8898# error "Port me"
8899# endif
8900 cbInstr = sizeof(paNative[0]);
8901 }
8902 offNative += cbInstr / sizeof(paNative[0]);
8903
8904# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
8905 cs_insn *pInstr;
8906 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
8907 (uintptr_t)pNativeCur, 1, &pInstr);
8908 if (cInstrs > 0)
8909 {
8910 Assert(cInstrs == 1);
8911 const char *pszAnnotation = NULL;
8912# if defined(RT_ARCH_ARM64)
8913 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
8914 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
8915 {
8916 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
8917 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
8918 char *psz = strchr(pInstr->op_str, '[');
8919 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
8920 {
8921 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
8922 int32_t off = -1;
8923 psz += 4;
8924 if (*psz == ']')
8925 off = 0;
8926 else if (*psz == ',')
8927 {
8928 psz = RTStrStripL(psz + 1);
8929 if (*psz == '#')
8930 off = RTStrToInt32(&psz[1]);
8931 /** @todo deal with index registers and LSL as well... */
8932 }
8933 if (off >= 0)
8934 pszAnnotation = iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
8935 }
8936 }
8937# endif
8938
8939 size_t const cchOp = strlen(pInstr->op_str);
8940# if defined(RT_ARCH_AMD64)
8941 if (pszAnnotation)
8942 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
8943 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
8944 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
8945 else
8946 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
8947 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
8948
8949# else
8950 if (pszAnnotation)
8951 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
8952 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
8953 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
8954 else
8955 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
8956 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
8957# endif
8958 offNative += pInstr->size / sizeof(*pNativeCur);
8959 cs_free(pInstr, cInstrs);
8960 }
8961 else
8962 {
8963# if defined(RT_ARCH_AMD64)
8964 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
8965 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
8966# else
8967 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
8968# endif
8969 offNative++;
8970 }
8971# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
8972 }
8973 }
8974 else
8975#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
8976 {
8977 /*
8978 * No debug info, just disassemble the x86 code and then the native code.
8979 *
8980 * First the guest code:
8981 */
8982 for (unsigned i = 0; i < pTb->cRanges; i++)
8983 {
8984 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
8985 + (pTb->aRanges[i].idxPhysPage == 0
8986 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
8987 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
8988 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
8989 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
8990 unsigned off = pTb->aRanges[i].offOpcodes;
8991 /** @todo this ain't working when crossing pages! */
8992 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
8993 while (off < cbOpcodes)
8994 {
8995 uint32_t cbInstr = 1;
8996 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
8997 &pTb->pabOpcodes[off], cbOpcodes - off,
8998 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
8999 if (RT_SUCCESS(rc))
9000 {
9001 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9002 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9003 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9004 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9005 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
9006 GCPhysPc += cbInstr;
9007 off += cbInstr;
9008 }
9009 else
9010 {
9011 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
9012 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
9013 break;
9014 }
9015 }
9016 }
9017
9018 /*
9019 * Then the native code:
9020 */
9021 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
9022 while (offNative < cNative)
9023 {
9024 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9025# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9026 uint32_t cbInstr = sizeof(paNative[0]);
9027 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9028 if (RT_SUCCESS(rc))
9029 {
9030# if defined(RT_ARCH_AMD64)
9031 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9032 {
9033 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9034 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9035 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9036 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9037 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9038 uInfo & 0x8000 ? "recompiled" : "todo");
9039 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9040 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9041 else
9042 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9043 }
9044 else
9045# endif
9046 {
9047# ifdef RT_ARCH_AMD64
9048 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9049 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9050 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9051 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9052# elif defined(RT_ARCH_ARM64)
9053 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9054 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9055 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9056# else
9057# error "Port me"
9058# endif
9059 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9060 }
9061 }
9062 else
9063 {
9064# if defined(RT_ARCH_AMD64)
9065 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9066 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9067# else
9068 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9069# endif
9070 cbInstr = sizeof(paNative[0]);
9071 }
9072 offNative += cbInstr / sizeof(paNative[0]);
9073
9074# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9075 cs_insn *pInstr;
9076 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9077 (uintptr_t)pNativeCur, 1, &pInstr);
9078 if (cInstrs > 0)
9079 {
9080 Assert(cInstrs == 1);
9081# if defined(RT_ARCH_AMD64)
9082 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9083 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9084# else
9085 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9086 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9087# endif
9088 offNative += pInstr->size / sizeof(*pNativeCur);
9089 cs_free(pInstr, cInstrs);
9090 }
9091 else
9092 {
9093# if defined(RT_ARCH_AMD64)
9094 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9095 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9096# else
9097 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9098# endif
9099 offNative++;
9100 }
9101# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9102 }
9103 }
9104
9105#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9106 /* Cleanup. */
9107 cs_close(&hDisasm);
9108#endif
9109}
9110
9111
9112/**
9113 * Recompiles the given threaded TB into a native one.
9114 *
9115 * In case of failure the translation block will be returned as-is.
9116 *
9117 * @returns pTb.
9118 * @param pVCpu The cross context virtual CPU structure of the calling
9119 * thread.
9120 * @param pTb The threaded translation to recompile to native.
9121 */
9122DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
9123{
9124 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
9125
9126 /*
9127 * The first time thru, we allocate the recompiler state, the other times
9128 * we just need to reset it before using it again.
9129 */
9130 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
9131 if (RT_LIKELY(pReNative))
9132 iemNativeReInit(pReNative, pTb);
9133 else
9134 {
9135 pReNative = iemNativeInit(pVCpu, pTb);
9136 AssertReturn(pReNative, pTb);
9137 }
9138
9139#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9140 /*
9141 * First do liveness analysis. This is done backwards.
9142 */
9143 {
9144 uint32_t idxCall = pTb->Thrd.cCalls;
9145 if (idxCall <= pReNative->cLivenessEntriesAlloc)
9146 { /* likely */ }
9147 else
9148 {
9149 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
9150 while (idxCall > cAlloc)
9151 cAlloc *= 2;
9152 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
9153 AssertReturn(pvNew, pTb);
9154 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
9155 pReNative->cLivenessEntriesAlloc = cAlloc;
9156 }
9157 AssertReturn(idxCall > 0, pTb);
9158 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
9159
9160 /* The initial (final) entry. */
9161 idxCall--;
9162 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
9163
9164 /* Loop backwards thru the calls and fill in the other entries. */
9165 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
9166 while (idxCall > 0)
9167 {
9168 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
9169 if (pfnLiveness)
9170 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
9171 else
9172 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
9173 pCallEntry--;
9174 idxCall--;
9175 }
9176
9177# ifdef VBOX_WITH_STATISTICS
9178 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
9179 to 'clobbered' rather that 'input'. */
9180 /** @todo */
9181# endif
9182 }
9183#endif
9184
9185 /*
9186 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
9187 * for aborting if an error happens.
9188 */
9189 uint32_t cCallsLeft = pTb->Thrd.cCalls;
9190#ifdef LOG_ENABLED
9191 uint32_t const cCallsOrg = cCallsLeft;
9192#endif
9193 uint32_t off = 0;
9194 int rc = VINF_SUCCESS;
9195 IEMNATIVE_TRY_SETJMP(pReNative, rc)
9196 {
9197 /*
9198 * Emit prolog code (fixed).
9199 */
9200 off = iemNativeEmitProlog(pReNative, off);
9201
9202 /*
9203 * Convert the calls to native code.
9204 */
9205#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9206 int32_t iGstInstr = -1;
9207#endif
9208#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
9209 uint32_t cThreadedCalls = 0;
9210 uint32_t cRecompiledCalls = 0;
9211#endif
9212#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9213 uint32_t idxCurCall = 0;
9214#endif
9215 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
9216 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
9217 while (cCallsLeft-- > 0)
9218 {
9219 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
9220#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9221 pReNative->idxCurCall = idxCurCall;
9222#endif
9223
9224 /*
9225 * Debug info, assembly markup and statistics.
9226 */
9227#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
9228 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
9229 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
9230#endif
9231#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9232 iemNativeDbgInfoAddNativeOffset(pReNative, off);
9233 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
9234 {
9235 if (iGstInstr < (int32_t)pTb->cInstructions)
9236 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
9237 else
9238 Assert(iGstInstr == pTb->cInstructions);
9239 iGstInstr = pCallEntry->idxInstr;
9240 }
9241 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
9242#endif
9243#if defined(VBOX_STRICT)
9244 off = iemNativeEmitMarker(pReNative, off,
9245 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
9246#endif
9247#if defined(VBOX_STRICT)
9248 iemNativeRegAssertSanity(pReNative);
9249#endif
9250#ifdef VBOX_WITH_STATISTICS
9251 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
9252#endif
9253
9254 /*
9255 * Actual work.
9256 */
9257 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
9258 pfnRecom ? "(recompiled)" : "(todo)"));
9259 if (pfnRecom) /** @todo stats on this. */
9260 {
9261 off = pfnRecom(pReNative, off, pCallEntry);
9262 STAM_REL_STATS({cRecompiledCalls++;});
9263 }
9264 else
9265 {
9266 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
9267 STAM_REL_STATS({cThreadedCalls++;});
9268 }
9269 Assert(off <= pReNative->cInstrBufAlloc);
9270 Assert(pReNative->cCondDepth == 0);
9271
9272#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
9273 if (LogIs2Enabled())
9274 {
9275 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
9276# ifndef IEMLIVENESS_EXTENDED_LAYOUT
9277 static const char s_achState[] = "CUXI";
9278# else
9279 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
9280# endif
9281
9282 char szGpr[17];
9283 for (unsigned i = 0; i < 16; i++)
9284 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
9285 szGpr[16] = '\0';
9286
9287 char szSegBase[X86_SREG_COUNT + 1];
9288 char szSegLimit[X86_SREG_COUNT + 1];
9289 char szSegAttrib[X86_SREG_COUNT + 1];
9290 char szSegSel[X86_SREG_COUNT + 1];
9291 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
9292 {
9293 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
9294 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
9295 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
9296 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
9297 }
9298 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
9299 = szSegSel[X86_SREG_COUNT] = '\0';
9300
9301 char szEFlags[8];
9302 for (unsigned i = 0; i < 7; i++)
9303 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
9304 szEFlags[7] = '\0';
9305
9306 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
9307 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
9308 }
9309#endif
9310
9311 /*
9312 * Advance.
9313 */
9314 pCallEntry++;
9315#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9316 idxCurCall++;
9317#endif
9318 }
9319
9320 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
9321 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
9322 if (!cThreadedCalls)
9323 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
9324
9325 /*
9326 * Emit the epilog code.
9327 */
9328 uint32_t idxReturnLabel;
9329 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
9330
9331 /*
9332 * Generate special jump labels.
9333 */
9334 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
9335 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
9336 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
9337 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
9338 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
9339 off = iemNativeEmitRaiseGp0(pReNative, off, idxReturnLabel);
9340 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseNm))
9341 off = iemNativeEmitRaiseNm(pReNative, off, idxReturnLabel);
9342 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseUd))
9343 off = iemNativeEmitRaiseUd(pReNative, off, idxReturnLabel);
9344 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseMf))
9345 off = iemNativeEmitRaiseMf(pReNative, off, idxReturnLabel);
9346 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseXf))
9347 off = iemNativeEmitRaiseXf(pReNative, off, idxReturnLabel);
9348 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseDe))
9349 off = iemNativeEmitRaiseDe(pReNative, off, idxReturnLabel);
9350 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ObsoleteTb))
9351 off = iemNativeEmitObsoleteTb(pReNative, off, idxReturnLabel);
9352 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_NeedCsLimChecking))
9353 off = iemNativeEmitNeedCsLimChecking(pReNative, off, idxReturnLabel);
9354 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_CheckBranchMiss))
9355 off = iemNativeEmitCheckBranchMiss(pReNative, off, idxReturnLabel);
9356 }
9357 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
9358 {
9359 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
9360 return pTb;
9361 }
9362 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
9363 Assert(off <= pReNative->cInstrBufAlloc);
9364
9365 /*
9366 * Make sure all labels has been defined.
9367 */
9368 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
9369#ifdef VBOX_STRICT
9370 uint32_t const cLabels = pReNative->cLabels;
9371 for (uint32_t i = 0; i < cLabels; i++)
9372 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
9373#endif
9374
9375 /*
9376 * Allocate executable memory, copy over the code we've generated.
9377 */
9378 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
9379 if (pTbAllocator->pDelayedFreeHead)
9380 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
9381
9382 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
9383 AssertReturn(paFinalInstrBuf, pTb);
9384 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
9385
9386 /*
9387 * Apply fixups.
9388 */
9389 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
9390 uint32_t const cFixups = pReNative->cFixups;
9391 for (uint32_t i = 0; i < cFixups; i++)
9392 {
9393 Assert(paFixups[i].off < off);
9394 Assert(paFixups[i].idxLabel < cLabels);
9395 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
9396 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
9397 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
9398 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
9399 switch (paFixups[i].enmType)
9400 {
9401#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
9402 case kIemNativeFixupType_Rel32:
9403 Assert(paFixups[i].off + 4 <= off);
9404 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9405 continue;
9406
9407#elif defined(RT_ARCH_ARM64)
9408 case kIemNativeFixupType_RelImm26At0:
9409 {
9410 Assert(paFixups[i].off < off);
9411 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9412 Assert(offDisp >= -262144 && offDisp < 262144);
9413 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
9414 continue;
9415 }
9416
9417 case kIemNativeFixupType_RelImm19At5:
9418 {
9419 Assert(paFixups[i].off < off);
9420 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9421 Assert(offDisp >= -262144 && offDisp < 262144);
9422 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
9423 continue;
9424 }
9425
9426 case kIemNativeFixupType_RelImm14At5:
9427 {
9428 Assert(paFixups[i].off < off);
9429 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9430 Assert(offDisp >= -8192 && offDisp < 8192);
9431 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
9432 continue;
9433 }
9434
9435#endif
9436 case kIemNativeFixupType_Invalid:
9437 case kIemNativeFixupType_End:
9438 break;
9439 }
9440 AssertFailed();
9441 }
9442
9443 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
9444 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
9445
9446 /*
9447 * Convert the translation block.
9448 */
9449 RTMemFree(pTb->Thrd.paCalls);
9450 pTb->Native.paInstructions = paFinalInstrBuf;
9451 pTb->Native.cInstructions = off;
9452 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
9453#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9454 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
9455 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
9456#endif
9457
9458 Assert(pTbAllocator->cThreadedTbs > 0);
9459 pTbAllocator->cThreadedTbs -= 1;
9460 pTbAllocator->cNativeTbs += 1;
9461 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
9462
9463#ifdef LOG_ENABLED
9464 /*
9465 * Disassemble to the log if enabled.
9466 */
9467 if (LogIs3Enabled())
9468 {
9469 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
9470 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
9471# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
9472 RTLogFlush(NULL);
9473# endif
9474 }
9475#endif
9476 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
9477
9478 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
9479 return pTb;
9480}
9481
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette