VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 103825

Last change on this file since 103825 was 103811, checked in by vboxsync, 9 months ago

VMM/IEM: Fix the SIMD register check code on amd64, bugref:10614

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 403.8 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 103811 2024-03-12 21:45:22Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMN8veRecompilerTlbLookup.h"
103#include "IEMNativeFunctions.h"
104
105
106/*
107 * Narrow down configs here to avoid wasting time on unused configs here.
108 * Note! Same checks in IEMAllThrdRecompiler.cpp.
109 */
110
111#ifndef IEM_WITH_CODE_TLB
112# error The code TLB must be enabled for the recompiler.
113#endif
114
115#ifndef IEM_WITH_DATA_TLB
116# error The data TLB must be enabled for the recompiler.
117#endif
118
119#ifndef IEM_WITH_SETJMP
120# error The setjmp approach must be enabled for the recompiler.
121#endif
122
123/** @todo eliminate this clang build hack. */
124#if RT_CLANG_PREREQ(4, 0)
125# pragma GCC diagnostic ignored "-Wunused-function"
126#endif
127
128
129/*********************************************************************************************************************************
130* Internal Functions *
131*********************************************************************************************************************************/
132#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
133static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
134#endif
135DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
136DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
137 IEMNATIVEGSTREG enmGstReg, uint32_t off);
138DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
139
140
141/*********************************************************************************************************************************
142* Executable Memory Allocator *
143*********************************************************************************************************************************/
144/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
145 * Use an alternative chunk sub-allocator that does store internal data
146 * in the chunk.
147 *
148 * Using the RTHeapSimple is not practial on newer darwin systems where
149 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
150 * memory. We would have to change the protection of the whole chunk for
151 * every call to RTHeapSimple, which would be rather expensive.
152 *
153 * This alternative implemenation let restrict page protection modifications
154 * to the pages backing the executable memory we just allocated.
155 */
156#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
157/** The chunk sub-allocation unit size in bytes. */
158#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
159/** The chunk sub-allocation unit size as a shift factor. */
160#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
161
162#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
163# ifdef IEMNATIVE_USE_GDB_JIT
164# define IEMNATIVE_USE_GDB_JIT_ET_DYN
165
166/** GDB JIT: Code entry. */
167typedef struct GDBJITCODEENTRY
168{
169 struct GDBJITCODEENTRY *pNext;
170 struct GDBJITCODEENTRY *pPrev;
171 uint8_t *pbSymFile;
172 uint64_t cbSymFile;
173} GDBJITCODEENTRY;
174
175/** GDB JIT: Actions. */
176typedef enum GDBJITACTIONS : uint32_t
177{
178 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
179} GDBJITACTIONS;
180
181/** GDB JIT: Descriptor. */
182typedef struct GDBJITDESCRIPTOR
183{
184 uint32_t uVersion;
185 GDBJITACTIONS enmAction;
186 GDBJITCODEENTRY *pRelevant;
187 GDBJITCODEENTRY *pHead;
188 /** Our addition: */
189 GDBJITCODEENTRY *pTail;
190} GDBJITDESCRIPTOR;
191
192/** GDB JIT: Our simple symbol file data. */
193typedef struct GDBJITSYMFILE
194{
195 Elf64_Ehdr EHdr;
196# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
197 Elf64_Shdr aShdrs[5];
198# else
199 Elf64_Shdr aShdrs[7];
200 Elf64_Phdr aPhdrs[2];
201# endif
202 /** The dwarf ehframe data for the chunk. */
203 uint8_t abEhFrame[512];
204 char szzStrTab[128];
205 Elf64_Sym aSymbols[3];
206# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
207 Elf64_Sym aDynSyms[2];
208 Elf64_Dyn aDyn[6];
209# endif
210} GDBJITSYMFILE;
211
212extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
213extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
214
215/** Init once for g_IemNativeGdbJitLock. */
216static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
217/** Init once for the critical section. */
218static RTCRITSECT g_IemNativeGdbJitLock;
219
220/** GDB reads the info here. */
221GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
222
223/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
224DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
225{
226 ASMNopPause();
227}
228
229/** @callback_method_impl{FNRTONCE} */
230static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
231{
232 RT_NOREF(pvUser);
233 return RTCritSectInit(&g_IemNativeGdbJitLock);
234}
235
236
237# endif /* IEMNATIVE_USE_GDB_JIT */
238
239/**
240 * Per-chunk unwind info for non-windows hosts.
241 */
242typedef struct IEMEXECMEMCHUNKEHFRAME
243{
244# ifdef IEMNATIVE_USE_LIBUNWIND
245 /** The offset of the FDA into abEhFrame. */
246 uintptr_t offFda;
247# else
248 /** 'struct object' storage area. */
249 uint8_t abObject[1024];
250# endif
251# ifdef IEMNATIVE_USE_GDB_JIT
252# if 0
253 /** The GDB JIT 'symbol file' data. */
254 GDBJITSYMFILE GdbJitSymFile;
255# endif
256 /** The GDB JIT list entry. */
257 GDBJITCODEENTRY GdbJitEntry;
258# endif
259 /** The dwarf ehframe data for the chunk. */
260 uint8_t abEhFrame[512];
261} IEMEXECMEMCHUNKEHFRAME;
262/** Pointer to per-chunk info info for non-windows hosts. */
263typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
264#endif
265
266
267/**
268 * An chunk of executable memory.
269 */
270typedef struct IEMEXECMEMCHUNK
271{
272#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
273 /** Number of free items in this chunk. */
274 uint32_t cFreeUnits;
275 /** Hint were to start searching for free space in the allocation bitmap. */
276 uint32_t idxFreeHint;
277#else
278 /** The heap handle. */
279 RTHEAPSIMPLE hHeap;
280#endif
281 /** Pointer to the chunk. */
282 void *pvChunk;
283#ifdef IN_RING3
284 /**
285 * Pointer to the unwind information.
286 *
287 * This is used during C++ throw and longjmp (windows and probably most other
288 * platforms). Some debuggers (windbg) makes use of it as well.
289 *
290 * Windows: This is allocated from hHeap on windows because (at least for
291 * AMD64) the UNWIND_INFO structure address in the
292 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
293 *
294 * Others: Allocated from the regular heap to avoid unnecessary executable data
295 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
296 void *pvUnwindInfo;
297#elif defined(IN_RING0)
298 /** Allocation handle. */
299 RTR0MEMOBJ hMemObj;
300#endif
301} IEMEXECMEMCHUNK;
302/** Pointer to a memory chunk. */
303typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
304
305
306/**
307 * Executable memory allocator for the native recompiler.
308 */
309typedef struct IEMEXECMEMALLOCATOR
310{
311 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
312 uint32_t uMagic;
313
314 /** The chunk size. */
315 uint32_t cbChunk;
316 /** The maximum number of chunks. */
317 uint32_t cMaxChunks;
318 /** The current number of chunks. */
319 uint32_t cChunks;
320 /** Hint where to start looking for available memory. */
321 uint32_t idxChunkHint;
322 /** Statistics: Current number of allocations. */
323 uint32_t cAllocations;
324
325 /** The total amount of memory available. */
326 uint64_t cbTotal;
327 /** Total amount of free memory. */
328 uint64_t cbFree;
329 /** Total amount of memory allocated. */
330 uint64_t cbAllocated;
331
332#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
333 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
334 *
335 * Since the chunk size is a power of two and the minimum chunk size is a lot
336 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
337 * require a whole number of uint64_t elements in the allocation bitmap. So,
338 * for sake of simplicity, they are allocated as one continous chunk for
339 * simplicity/laziness. */
340 uint64_t *pbmAlloc;
341 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
342 uint32_t cUnitsPerChunk;
343 /** Number of bitmap elements per chunk (for quickly locating the bitmap
344 * portion corresponding to an chunk). */
345 uint32_t cBitmapElementsPerChunk;
346#else
347 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
348 * @{ */
349 /** The size of the heap internal block header. This is used to adjust the
350 * request memory size to make sure there is exacly enough room for a header at
351 * the end of the blocks we allocate before the next 64 byte alignment line. */
352 uint32_t cbHeapBlockHdr;
353 /** The size of initial heap allocation required make sure the first
354 * allocation is correctly aligned. */
355 uint32_t cbHeapAlignTweak;
356 /** The alignment tweak allocation address. */
357 void *pvAlignTweak;
358 /** @} */
359#endif
360
361#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
362 /** Pointer to the array of unwind info running parallel to aChunks (same
363 * allocation as this structure, located after the bitmaps).
364 * (For Windows, the structures must reside in 32-bit RVA distance to the
365 * actual chunk, so they are allocated off the chunk.) */
366 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
367#endif
368
369 /** The allocation chunks. */
370 RT_FLEXIBLE_ARRAY_EXTENSION
371 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
372} IEMEXECMEMALLOCATOR;
373/** Pointer to an executable memory allocator. */
374typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
375
376/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
377#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
378
379
380static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
381
382
383/**
384 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
385 * the heap statistics.
386 */
387static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
388 uint32_t cbReq, uint32_t idxChunk)
389{
390 pExecMemAllocator->cAllocations += 1;
391 pExecMemAllocator->cbAllocated += cbReq;
392#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
393 pExecMemAllocator->cbFree -= cbReq;
394#else
395 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
396#endif
397 pExecMemAllocator->idxChunkHint = idxChunk;
398
399#ifdef RT_OS_DARWIN
400 /*
401 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
402 * on darwin. So, we mark the pages returned as read+write after alloc and
403 * expect the caller to call iemExecMemAllocatorReadyForUse when done
404 * writing to the allocation.
405 *
406 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
407 * for details.
408 */
409 /** @todo detect if this is necessary... it wasn't required on 10.15 or
410 * whatever older version it was. */
411 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
412 AssertRC(rc);
413#endif
414
415 return pvRet;
416}
417
418
419#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
420static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
421 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
422{
423 /*
424 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
425 */
426 Assert(!(cToScan & 63));
427 Assert(!(idxFirst & 63));
428 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
429 pbmAlloc += idxFirst / 64;
430
431 /*
432 * Scan the bitmap for cReqUnits of consequtive clear bits
433 */
434 /** @todo This can probably be done more efficiently for non-x86 systems. */
435 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
436 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
437 {
438 uint32_t idxAddBit = 1;
439 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
440 idxAddBit++;
441 if (idxAddBit >= cReqUnits)
442 {
443 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
444
445 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
446 pChunk->cFreeUnits -= cReqUnits;
447 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
448
449 void * const pvRet = (uint8_t *)pChunk->pvChunk
450 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
451
452 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
453 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
454 }
455
456 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
457 }
458 return NULL;
459}
460#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
461
462
463static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
464{
465#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
466 /*
467 * Figure out how much to allocate.
468 */
469 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
470 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
471 {
472 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
473 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
474 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
475 {
476 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
477 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
478 if (pvRet)
479 return pvRet;
480 }
481 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
482 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
483 cReqUnits, idxChunk);
484 }
485#else
486 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
487 if (pvRet)
488 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
489#endif
490 return NULL;
491
492}
493
494
495/**
496 * Allocates @a cbReq bytes of executable memory.
497 *
498 * @returns Pointer to the memory, NULL if out of memory or other problem
499 * encountered.
500 * @param pVCpu The cross context virtual CPU structure of the calling
501 * thread.
502 * @param cbReq How many bytes are required.
503 */
504static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
505{
506 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
507 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
508 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
509
510
511 for (unsigned iIteration = 0;; iIteration++)
512 {
513 /*
514 * Adjust the request size so it'll fit the allocator alignment/whatnot.
515 *
516 * For the RTHeapSimple allocator this means to follow the logic described
517 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
518 * existing chunks if we think we've got sufficient free memory around.
519 *
520 * While for the alternative one we just align it up to a whole unit size.
521 */
522#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
523 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
524#else
525 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
526#endif
527 if (cbReq <= pExecMemAllocator->cbFree)
528 {
529 uint32_t const cChunks = pExecMemAllocator->cChunks;
530 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
531 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
532 {
533 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
534 if (pvRet)
535 return pvRet;
536 }
537 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
538 {
539 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
540 if (pvRet)
541 return pvRet;
542 }
543 }
544
545 /*
546 * Can we grow it with another chunk?
547 */
548 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
549 {
550 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
551 AssertLogRelRCReturn(rc, NULL);
552
553 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
554 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
555 if (pvRet)
556 return pvRet;
557 AssertFailed();
558 }
559
560 /*
561 * Try prune native TBs once.
562 */
563 if (iIteration == 0)
564 iemTbAllocatorFreeupNativeSpace(pVCpu, cbReq / sizeof(IEMNATIVEINSTR));
565 else
566 {
567 /** @todo stats... */
568 return NULL;
569 }
570 }
571
572}
573
574
575/** This is a hook that we may need later for changing memory protection back
576 * to readonly+exec */
577static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
578{
579#ifdef RT_OS_DARWIN
580 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
581 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
582 AssertRC(rc); RT_NOREF(pVCpu);
583
584 /*
585 * Flush the instruction cache:
586 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
587 */
588 /* sys_dcache_flush(pv, cb); - not necessary */
589 sys_icache_invalidate(pv, cb);
590#else
591 RT_NOREF(pVCpu, pv, cb);
592#endif
593}
594
595
596/**
597 * Frees executable memory.
598 */
599void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
600{
601 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
602 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
603 Assert(pv);
604#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
605 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
606#else
607 Assert(!((uintptr_t)pv & 63));
608#endif
609
610 /* Align the size as we did when allocating the block. */
611#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
612 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
613#else
614 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
615#endif
616
617 /* Free it / assert sanity. */
618#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
619 uint32_t const cChunks = pExecMemAllocator->cChunks;
620 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
621 bool fFound = false;
622 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
623 {
624 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
625 fFound = offChunk < cbChunk;
626 if (fFound)
627 {
628#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
629 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
630 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
631
632 /* Check that it's valid and free it. */
633 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
634 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
635 for (uint32_t i = 1; i < cReqUnits; i++)
636 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
637 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
638
639 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
640 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
641
642 /* Update the stats. */
643 pExecMemAllocator->cbAllocated -= cb;
644 pExecMemAllocator->cbFree += cb;
645 pExecMemAllocator->cAllocations -= 1;
646 return;
647#else
648 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
649 break;
650#endif
651 }
652 }
653# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
654 AssertFailed();
655# else
656 Assert(fFound);
657# endif
658#endif
659
660#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
661 /* Update stats while cb is freshly calculated.*/
662 pExecMemAllocator->cbAllocated -= cb;
663 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
664 pExecMemAllocator->cAllocations -= 1;
665
666 /* Free it. */
667 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
668#endif
669}
670
671
672
673#ifdef IN_RING3
674# ifdef RT_OS_WINDOWS
675
676/**
677 * Initializes the unwind info structures for windows hosts.
678 */
679static int
680iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
681 void *pvChunk, uint32_t idxChunk)
682{
683 RT_NOREF(pVCpu);
684
685 /*
686 * The AMD64 unwind opcodes.
687 *
688 * This is a program that starts with RSP after a RET instruction that
689 * ends up in recompiled code, and the operations we describe here will
690 * restore all non-volatile registers and bring RSP back to where our
691 * RET address is. This means it's reverse order from what happens in
692 * the prologue.
693 *
694 * Note! Using a frame register approach here both because we have one
695 * and but mainly because the UWOP_ALLOC_LARGE argument values
696 * would be a pain to write initializers for. On the positive
697 * side, we're impervious to changes in the the stack variable
698 * area can can deal with dynamic stack allocations if necessary.
699 */
700 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
701 {
702 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
703 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
704 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
705 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
706 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
707 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
708 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
709 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
710 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
711 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
712 };
713 union
714 {
715 IMAGE_UNWIND_INFO Info;
716 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
717 } s_UnwindInfo =
718 {
719 {
720 /* .Version = */ 1,
721 /* .Flags = */ 0,
722 /* .SizeOfProlog = */ 16, /* whatever */
723 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
724 /* .FrameRegister = */ X86_GREG_xBP,
725 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
726 }
727 };
728 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
729 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
730
731 /*
732 * Calc how much space we need and allocate it off the exec heap.
733 */
734 unsigned const cFunctionEntries = 1;
735 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
736 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
737# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
738 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
739 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
740 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
741# else
742 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
743 - pExecMemAllocator->cbHeapBlockHdr;
744 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
745 32 /*cbAlignment*/);
746# endif
747 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
748 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
749
750 /*
751 * Initialize the structures.
752 */
753 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
754
755 paFunctions[0].BeginAddress = 0;
756 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
757 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
758
759 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
760 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
761
762 /*
763 * Register it.
764 */
765 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
766 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
767
768 return VINF_SUCCESS;
769}
770
771
772# else /* !RT_OS_WINDOWS */
773
774/**
775 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
776 */
777DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
778{
779 if (iValue >= 64)
780 {
781 Assert(iValue < 0x2000);
782 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
783 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
784 }
785 else if (iValue >= 0)
786 *Ptr.pb++ = (uint8_t)iValue;
787 else if (iValue > -64)
788 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
789 else
790 {
791 Assert(iValue > -0x2000);
792 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
793 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
794 }
795 return Ptr;
796}
797
798
799/**
800 * Emits an ULEB128 encoded value (up to 64-bit wide).
801 */
802DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
803{
804 while (uValue >= 0x80)
805 {
806 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
807 uValue >>= 7;
808 }
809 *Ptr.pb++ = (uint8_t)uValue;
810 return Ptr;
811}
812
813
814/**
815 * Emits a CFA rule as register @a uReg + offset @a off.
816 */
817DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
818{
819 *Ptr.pb++ = DW_CFA_def_cfa;
820 Ptr = iemDwarfPutUleb128(Ptr, uReg);
821 Ptr = iemDwarfPutUleb128(Ptr, off);
822 return Ptr;
823}
824
825
826/**
827 * Emits a register (@a uReg) save location:
828 * CFA + @a off * data_alignment_factor
829 */
830DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
831{
832 if (uReg < 0x40)
833 *Ptr.pb++ = DW_CFA_offset | uReg;
834 else
835 {
836 *Ptr.pb++ = DW_CFA_offset_extended;
837 Ptr = iemDwarfPutUleb128(Ptr, uReg);
838 }
839 Ptr = iemDwarfPutUleb128(Ptr, off);
840 return Ptr;
841}
842
843
844# if 0 /* unused */
845/**
846 * Emits a register (@a uReg) save location, using signed offset:
847 * CFA + @a offSigned * data_alignment_factor
848 */
849DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
850{
851 *Ptr.pb++ = DW_CFA_offset_extended_sf;
852 Ptr = iemDwarfPutUleb128(Ptr, uReg);
853 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
854 return Ptr;
855}
856# endif
857
858
859/**
860 * Initializes the unwind info section for non-windows hosts.
861 */
862static int
863iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
864 void *pvChunk, uint32_t idxChunk)
865{
866 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
867 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
868
869 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
870
871 /*
872 * Generate the CIE first.
873 */
874# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
875 uint8_t const iDwarfVer = 3;
876# else
877 uint8_t const iDwarfVer = 4;
878# endif
879 RTPTRUNION const PtrCie = Ptr;
880 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
881 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
882 *Ptr.pb++ = iDwarfVer; /* DwARF version */
883 *Ptr.pb++ = 0; /* Augmentation. */
884 if (iDwarfVer >= 4)
885 {
886 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
887 *Ptr.pb++ = 0; /* Segment selector size. */
888 }
889# ifdef RT_ARCH_AMD64
890 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
891# else
892 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
893# endif
894 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
895# ifdef RT_ARCH_AMD64
896 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
897# elif defined(RT_ARCH_ARM64)
898 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
899# else
900# error "port me"
901# endif
902 /* Initial instructions: */
903# ifdef RT_ARCH_AMD64
904 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
905 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
906 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
907 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
908 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
909 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
910 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
911 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
912# elif defined(RT_ARCH_ARM64)
913# if 1
914 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
915# else
916 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
917# endif
918 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
919 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
920 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
921 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
922 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
923 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
924 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
925 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
926 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
927 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
928 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
929 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
930 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
931 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
932# else
933# error "port me"
934# endif
935 while ((Ptr.u - PtrCie.u) & 3)
936 *Ptr.pb++ = DW_CFA_nop;
937 /* Finalize the CIE size. */
938 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
939
940 /*
941 * Generate an FDE for the whole chunk area.
942 */
943# ifdef IEMNATIVE_USE_LIBUNWIND
944 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
945# endif
946 RTPTRUNION const PtrFde = Ptr;
947 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
948 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
949 Ptr.pu32++;
950 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
951 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
952# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
953 *Ptr.pb++ = DW_CFA_nop;
954# endif
955 while ((Ptr.u - PtrFde.u) & 3)
956 *Ptr.pb++ = DW_CFA_nop;
957 /* Finalize the FDE size. */
958 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
959
960 /* Terminator entry. */
961 *Ptr.pu32++ = 0;
962 *Ptr.pu32++ = 0; /* just to be sure... */
963 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
964
965 /*
966 * Register it.
967 */
968# ifdef IEMNATIVE_USE_LIBUNWIND
969 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
970# else
971 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
972 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
973# endif
974
975# ifdef IEMNATIVE_USE_GDB_JIT
976 /*
977 * Now for telling GDB about this (experimental).
978 *
979 * This seems to work best with ET_DYN.
980 */
981 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
982# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
983 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
984 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
985# else
986 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
987 - pExecMemAllocator->cbHeapBlockHdr;
988 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
989# endif
990 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
991 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
992
993 RT_ZERO(*pSymFile);
994
995 /*
996 * The ELF header:
997 */
998 pSymFile->EHdr.e_ident[0] = ELFMAG0;
999 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1000 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1001 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1002 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1003 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1004 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1005 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1006# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1007 pSymFile->EHdr.e_type = ET_DYN;
1008# else
1009 pSymFile->EHdr.e_type = ET_REL;
1010# endif
1011# ifdef RT_ARCH_AMD64
1012 pSymFile->EHdr.e_machine = EM_AMD64;
1013# elif defined(RT_ARCH_ARM64)
1014 pSymFile->EHdr.e_machine = EM_AARCH64;
1015# else
1016# error "port me"
1017# endif
1018 pSymFile->EHdr.e_version = 1; /*?*/
1019 pSymFile->EHdr.e_entry = 0;
1020# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1021 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1022# else
1023 pSymFile->EHdr.e_phoff = 0;
1024# endif
1025 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1026 pSymFile->EHdr.e_flags = 0;
1027 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1028# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1029 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1030 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1031# else
1032 pSymFile->EHdr.e_phentsize = 0;
1033 pSymFile->EHdr.e_phnum = 0;
1034# endif
1035 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1036 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1037 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1038
1039 uint32_t offStrTab = 0;
1040#define APPEND_STR(a_szStr) do { \
1041 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1042 offStrTab += sizeof(a_szStr); \
1043 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1044 } while (0)
1045#define APPEND_STR_FMT(a_szStr, ...) do { \
1046 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1047 offStrTab++; \
1048 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1049 } while (0)
1050
1051 /*
1052 * Section headers.
1053 */
1054 /* Section header #0: NULL */
1055 unsigned i = 0;
1056 APPEND_STR("");
1057 RT_ZERO(pSymFile->aShdrs[i]);
1058 i++;
1059
1060 /* Section header: .eh_frame */
1061 pSymFile->aShdrs[i].sh_name = offStrTab;
1062 APPEND_STR(".eh_frame");
1063 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1064 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1065# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1066 pSymFile->aShdrs[i].sh_offset
1067 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1068# else
1069 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1070 pSymFile->aShdrs[i].sh_offset = 0;
1071# endif
1072
1073 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1074 pSymFile->aShdrs[i].sh_link = 0;
1075 pSymFile->aShdrs[i].sh_info = 0;
1076 pSymFile->aShdrs[i].sh_addralign = 1;
1077 pSymFile->aShdrs[i].sh_entsize = 0;
1078 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1079 i++;
1080
1081 /* Section header: .shstrtab */
1082 unsigned const iShStrTab = i;
1083 pSymFile->EHdr.e_shstrndx = iShStrTab;
1084 pSymFile->aShdrs[i].sh_name = offStrTab;
1085 APPEND_STR(".shstrtab");
1086 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1087 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1088# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1089 pSymFile->aShdrs[i].sh_offset
1090 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1091# else
1092 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1093 pSymFile->aShdrs[i].sh_offset = 0;
1094# endif
1095 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1096 pSymFile->aShdrs[i].sh_link = 0;
1097 pSymFile->aShdrs[i].sh_info = 0;
1098 pSymFile->aShdrs[i].sh_addralign = 1;
1099 pSymFile->aShdrs[i].sh_entsize = 0;
1100 i++;
1101
1102 /* Section header: .symbols */
1103 pSymFile->aShdrs[i].sh_name = offStrTab;
1104 APPEND_STR(".symtab");
1105 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1106 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1107 pSymFile->aShdrs[i].sh_offset
1108 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1109 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1110 pSymFile->aShdrs[i].sh_link = iShStrTab;
1111 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1112 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1113 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1114 i++;
1115
1116# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1117 /* Section header: .symbols */
1118 pSymFile->aShdrs[i].sh_name = offStrTab;
1119 APPEND_STR(".dynsym");
1120 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1121 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1122 pSymFile->aShdrs[i].sh_offset
1123 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1124 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1125 pSymFile->aShdrs[i].sh_link = iShStrTab;
1126 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1127 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1128 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1129 i++;
1130# endif
1131
1132# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1133 /* Section header: .dynamic */
1134 pSymFile->aShdrs[i].sh_name = offStrTab;
1135 APPEND_STR(".dynamic");
1136 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1137 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1138 pSymFile->aShdrs[i].sh_offset
1139 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1140 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1141 pSymFile->aShdrs[i].sh_link = iShStrTab;
1142 pSymFile->aShdrs[i].sh_info = 0;
1143 pSymFile->aShdrs[i].sh_addralign = 1;
1144 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1145 i++;
1146# endif
1147
1148 /* Section header: .text */
1149 unsigned const iShText = i;
1150 pSymFile->aShdrs[i].sh_name = offStrTab;
1151 APPEND_STR(".text");
1152 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1153 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1154# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1155 pSymFile->aShdrs[i].sh_offset
1156 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1157# else
1158 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1159 pSymFile->aShdrs[i].sh_offset = 0;
1160# endif
1161 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1162 pSymFile->aShdrs[i].sh_link = 0;
1163 pSymFile->aShdrs[i].sh_info = 0;
1164 pSymFile->aShdrs[i].sh_addralign = 1;
1165 pSymFile->aShdrs[i].sh_entsize = 0;
1166 i++;
1167
1168 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1169
1170# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1171 /*
1172 * The program headers:
1173 */
1174 /* Everything in a single LOAD segment: */
1175 i = 0;
1176 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1177 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1178 pSymFile->aPhdrs[i].p_offset
1179 = pSymFile->aPhdrs[i].p_vaddr
1180 = pSymFile->aPhdrs[i].p_paddr = 0;
1181 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1182 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1183 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1184 i++;
1185 /* The .dynamic segment. */
1186 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1187 pSymFile->aPhdrs[i].p_flags = PF_R;
1188 pSymFile->aPhdrs[i].p_offset
1189 = pSymFile->aPhdrs[i].p_vaddr
1190 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1191 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1192 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1193 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1194 i++;
1195
1196 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1197
1198 /*
1199 * The dynamic section:
1200 */
1201 i = 0;
1202 pSymFile->aDyn[i].d_tag = DT_SONAME;
1203 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1204 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1205 i++;
1206 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1207 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1208 i++;
1209 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1210 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1211 i++;
1212 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1213 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1214 i++;
1215 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1216 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1217 i++;
1218 pSymFile->aDyn[i].d_tag = DT_NULL;
1219 i++;
1220 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1221# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1222
1223 /*
1224 * Symbol tables:
1225 */
1226 /** @todo gdb doesn't seem to really like this ... */
1227 i = 0;
1228 pSymFile->aSymbols[i].st_name = 0;
1229 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1230 pSymFile->aSymbols[i].st_value = 0;
1231 pSymFile->aSymbols[i].st_size = 0;
1232 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1233 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1234# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1235 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1236# endif
1237 i++;
1238
1239 pSymFile->aSymbols[i].st_name = 0;
1240 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1241 pSymFile->aSymbols[i].st_value = 0;
1242 pSymFile->aSymbols[i].st_size = 0;
1243 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1244 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1245 i++;
1246
1247 pSymFile->aSymbols[i].st_name = offStrTab;
1248 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1249# if 0
1250 pSymFile->aSymbols[i].st_shndx = iShText;
1251 pSymFile->aSymbols[i].st_value = 0;
1252# else
1253 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1254 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1255# endif
1256 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1257 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1258 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1259# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1260 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1261 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1262# endif
1263 i++;
1264
1265 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1266 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1267
1268 /*
1269 * The GDB JIT entry and informing GDB.
1270 */
1271 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1272# if 1
1273 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1274# else
1275 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1276# endif
1277
1278 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1279 RTCritSectEnter(&g_IemNativeGdbJitLock);
1280 pEhFrame->GdbJitEntry.pNext = NULL;
1281 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1282 if (__jit_debug_descriptor.pTail)
1283 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1284 else
1285 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1286 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1287 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1288
1289 /* Notify GDB: */
1290 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1291 __jit_debug_register_code();
1292 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1293 RTCritSectLeave(&g_IemNativeGdbJitLock);
1294
1295# else /* !IEMNATIVE_USE_GDB_JIT */
1296 RT_NOREF(pVCpu);
1297# endif /* !IEMNATIVE_USE_GDB_JIT */
1298
1299 return VINF_SUCCESS;
1300}
1301
1302# endif /* !RT_OS_WINDOWS */
1303#endif /* IN_RING3 */
1304
1305
1306/**
1307 * Adds another chunk to the executable memory allocator.
1308 *
1309 * This is used by the init code for the initial allocation and later by the
1310 * regular allocator function when it's out of memory.
1311 */
1312static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1313{
1314 /* Check that we've room for growth. */
1315 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1316 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1317
1318 /* Allocate a chunk. */
1319#ifdef RT_OS_DARWIN
1320 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1321#else
1322 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1323#endif
1324 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1325
1326#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1327 int rc = VINF_SUCCESS;
1328#else
1329 /* Initialize the heap for the chunk. */
1330 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1331 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1332 AssertRC(rc);
1333 if (RT_SUCCESS(rc))
1334 {
1335 /*
1336 * We want the memory to be aligned on 64 byte, so the first time thru
1337 * here we do some exploratory allocations to see how we can achieve this.
1338 * On subsequent runs we only make an initial adjustment allocation, if
1339 * necessary.
1340 *
1341 * Since we own the heap implementation, we know that the internal block
1342 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1343 * so all we need to wrt allocation size adjustments is to add 32 bytes
1344 * to the size, align up by 64 bytes, and subtract 32 bytes.
1345 *
1346 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1347 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1348 * allocation to force subsequent allocations to return 64 byte aligned
1349 * user areas.
1350 */
1351 if (!pExecMemAllocator->cbHeapBlockHdr)
1352 {
1353 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1354 pExecMemAllocator->cbHeapAlignTweak = 64;
1355 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1356 32 /*cbAlignment*/);
1357 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1358
1359 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1360 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1361 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1362 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1363 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1364
1365 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1366 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1367 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1368 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1369 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1370
1371 RTHeapSimpleFree(hHeap, pvTest2);
1372 RTHeapSimpleFree(hHeap, pvTest1);
1373 }
1374 else
1375 {
1376 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1377 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1378 }
1379 if (RT_SUCCESS(rc))
1380#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1381 {
1382 /*
1383 * Add the chunk.
1384 *
1385 * This must be done before the unwind init so windows can allocate
1386 * memory from the chunk when using the alternative sub-allocator.
1387 */
1388 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1389#ifdef IN_RING3
1390 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1391#endif
1392#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1393 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1394#else
1395 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1396 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1397 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1398 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1399#endif
1400
1401 pExecMemAllocator->cChunks = idxChunk + 1;
1402 pExecMemAllocator->idxChunkHint = idxChunk;
1403
1404#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1405 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1406 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1407#else
1408 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1409 pExecMemAllocator->cbTotal += cbFree;
1410 pExecMemAllocator->cbFree += cbFree;
1411#endif
1412
1413#ifdef IN_RING3
1414 /*
1415 * Initialize the unwind information (this cannot really fail atm).
1416 * (This sets pvUnwindInfo.)
1417 */
1418 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1419 if (RT_SUCCESS(rc))
1420#endif
1421 {
1422 return VINF_SUCCESS;
1423 }
1424
1425#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1426 /* Just in case the impossible happens, undo the above up: */
1427 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1428 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1429 pExecMemAllocator->cChunks = idxChunk;
1430 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1431 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1432 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1433 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1434#endif
1435 }
1436#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1437 }
1438#endif
1439 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1440 RT_NOREF(pVCpu);
1441 return rc;
1442}
1443
1444
1445/**
1446 * Initializes the executable memory allocator for native recompilation on the
1447 * calling EMT.
1448 *
1449 * @returns VBox status code.
1450 * @param pVCpu The cross context virtual CPU structure of the calling
1451 * thread.
1452 * @param cbMax The max size of the allocator.
1453 * @param cbInitial The initial allocator size.
1454 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1455 * dependent).
1456 */
1457int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1458{
1459 /*
1460 * Validate input.
1461 */
1462 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1463 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1464 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1465 || cbChunk == 0
1466 || ( RT_IS_POWER_OF_TWO(cbChunk)
1467 && cbChunk >= _1M
1468 && cbChunk <= _256M
1469 && cbChunk <= cbMax),
1470 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1471 VERR_OUT_OF_RANGE);
1472
1473 /*
1474 * Adjust/figure out the chunk size.
1475 */
1476 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1477 {
1478 if (cbMax >= _256M)
1479 cbChunk = _64M;
1480 else
1481 {
1482 if (cbMax < _16M)
1483 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1484 else
1485 cbChunk = (uint32_t)cbMax / 4;
1486 if (!RT_IS_POWER_OF_TWO(cbChunk))
1487 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1488 }
1489 }
1490
1491 if (cbChunk > cbMax)
1492 cbMax = cbChunk;
1493 else
1494 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1495 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1496 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1497
1498 /*
1499 * Allocate and initialize the allocatore instance.
1500 */
1501 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1502#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1503 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1504 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1505 cbNeeded += cbBitmap * cMaxChunks;
1506 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1507 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1508#endif
1509#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1510 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1511 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1512#endif
1513 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1514 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1515 VERR_NO_MEMORY);
1516 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1517 pExecMemAllocator->cbChunk = cbChunk;
1518 pExecMemAllocator->cMaxChunks = cMaxChunks;
1519 pExecMemAllocator->cChunks = 0;
1520 pExecMemAllocator->idxChunkHint = 0;
1521 pExecMemAllocator->cAllocations = 0;
1522 pExecMemAllocator->cbTotal = 0;
1523 pExecMemAllocator->cbFree = 0;
1524 pExecMemAllocator->cbAllocated = 0;
1525#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1526 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1527 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1528 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1529 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1530#endif
1531#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1532 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1533#endif
1534 for (uint32_t i = 0; i < cMaxChunks; i++)
1535 {
1536#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1537 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1538 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1539#else
1540 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1541#endif
1542 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1543#ifdef IN_RING0
1544 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1545#else
1546 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1547#endif
1548 }
1549 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1550
1551 /*
1552 * Do the initial allocations.
1553 */
1554 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1555 {
1556 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1557 AssertLogRelRCReturn(rc, rc);
1558 }
1559
1560 pExecMemAllocator->idxChunkHint = 0;
1561
1562 return VINF_SUCCESS;
1563}
1564
1565
1566/*********************************************************************************************************************************
1567* Native Recompilation *
1568*********************************************************************************************************************************/
1569
1570
1571/**
1572 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1573 */
1574IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1575{
1576 pVCpu->iem.s.cInstructions += idxInstr;
1577 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1578}
1579
1580
1581/**
1582 * Used by TB code when it wants to raise a \#GP(0).
1583 */
1584IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
1585{
1586 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1587#ifndef _MSC_VER
1588 return VINF_IEM_RAISED_XCPT; /* not reached */
1589#endif
1590}
1591
1592
1593/**
1594 * Used by TB code when it wants to raise a \#NM.
1595 */
1596IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
1597{
1598 iemRaiseDeviceNotAvailableJmp(pVCpu);
1599#ifndef _MSC_VER
1600 return VINF_IEM_RAISED_XCPT; /* not reached */
1601#endif
1602}
1603
1604
1605/**
1606 * Used by TB code when it wants to raise a \#UD.
1607 */
1608IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
1609{
1610 iemRaiseUndefinedOpcodeJmp(pVCpu);
1611#ifndef _MSC_VER
1612 return VINF_IEM_RAISED_XCPT; /* not reached */
1613#endif
1614}
1615
1616
1617/**
1618 * Used by TB code when it wants to raise a \#MF.
1619 */
1620IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
1621{
1622 iemRaiseMathFaultJmp(pVCpu);
1623#ifndef _MSC_VER
1624 return VINF_IEM_RAISED_XCPT; /* not reached */
1625#endif
1626}
1627
1628
1629/**
1630 * Used by TB code when it wants to raise a \#XF.
1631 */
1632IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
1633{
1634 iemRaiseSimdFpExceptionJmp(pVCpu);
1635#ifndef _MSC_VER
1636 return VINF_IEM_RAISED_XCPT; /* not reached */
1637#endif
1638}
1639
1640
1641/**
1642 * Used by TB code when detecting opcode changes.
1643 * @see iemThreadeFuncWorkerObsoleteTb
1644 */
1645IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
1646{
1647 /* We set fSafeToFree to false where as we're being called in the context
1648 of a TB callback function, which for native TBs means we cannot release
1649 the executable memory till we've returned our way back to iemTbExec as
1650 that return path codes via the native code generated for the TB. */
1651 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1652 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
1653 return VINF_IEM_REEXEC_BREAK;
1654}
1655
1656
1657/**
1658 * Used by TB code when we need to switch to a TB with CS.LIM checking.
1659 */
1660IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
1661{
1662 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
1663 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1664 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
1665 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
1666 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
1667 return VINF_IEM_REEXEC_BREAK;
1668}
1669
1670
1671/**
1672 * Used by TB code when we missed a PC check after a branch.
1673 */
1674IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
1675{
1676 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
1677 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1678 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
1679 pVCpu->iem.s.pbInstrBuf));
1680 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
1681 return VINF_IEM_REEXEC_BREAK;
1682}
1683
1684
1685
1686/*********************************************************************************************************************************
1687* Helpers: Segmented memory fetches and stores. *
1688*********************************************************************************************************************************/
1689
1690/**
1691 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1692 */
1693IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1694{
1695#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1696 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1697#else
1698 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1699#endif
1700}
1701
1702
1703/**
1704 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1705 * to 16 bits.
1706 */
1707IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1708{
1709#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1710 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1711#else
1712 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1713#endif
1714}
1715
1716
1717/**
1718 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1719 * to 32 bits.
1720 */
1721IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1722{
1723#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1724 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1725#else
1726 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1727#endif
1728}
1729
1730/**
1731 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1732 * to 64 bits.
1733 */
1734IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1735{
1736#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1737 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1738#else
1739 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1740#endif
1741}
1742
1743
1744/**
1745 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1746 */
1747IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1748{
1749#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1750 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1751#else
1752 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1753#endif
1754}
1755
1756
1757/**
1758 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1759 * to 32 bits.
1760 */
1761IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1762{
1763#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1764 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1765#else
1766 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1767#endif
1768}
1769
1770
1771/**
1772 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1773 * to 64 bits.
1774 */
1775IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1776{
1777#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1778 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1779#else
1780 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1781#endif
1782}
1783
1784
1785/**
1786 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1787 */
1788IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1789{
1790#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1791 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1792#else
1793 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1794#endif
1795}
1796
1797
1798/**
1799 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
1800 * to 64 bits.
1801 */
1802IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1803{
1804#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1805 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1806#else
1807 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1808#endif
1809}
1810
1811
1812/**
1813 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1814 */
1815IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1816{
1817#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1818 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
1819#else
1820 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
1821#endif
1822}
1823
1824
1825/**
1826 * Used by TB code to store unsigned 8-bit data w/ segmentation.
1827 */
1828IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
1829{
1830#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1831 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1832#else
1833 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1834#endif
1835}
1836
1837
1838/**
1839 * Used by TB code to store unsigned 16-bit data w/ segmentation.
1840 */
1841IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
1842{
1843#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1844 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1845#else
1846 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1847#endif
1848}
1849
1850
1851/**
1852 * Used by TB code to store unsigned 32-bit data w/ segmentation.
1853 */
1854IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
1855{
1856#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1857 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1858#else
1859 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1860#endif
1861}
1862
1863
1864/**
1865 * Used by TB code to store unsigned 64-bit data w/ segmentation.
1866 */
1867IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
1868{
1869#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1870 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1871#else
1872 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1873#endif
1874}
1875
1876
1877
1878/**
1879 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
1880 */
1881IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1882{
1883#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1884 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1885#else
1886 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1887#endif
1888}
1889
1890
1891/**
1892 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
1893 */
1894IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1895{
1896#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1897 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1898#else
1899 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1900#endif
1901}
1902
1903
1904/**
1905 * Used by TB code to store an 32-bit selector value onto a generic stack.
1906 *
1907 * Intel CPUs doesn't do write a whole dword, thus the special function.
1908 */
1909IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1910{
1911#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1912 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1913#else
1914 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1915#endif
1916}
1917
1918
1919/**
1920 * Used by TB code to push unsigned 64-bit value onto a generic stack.
1921 */
1922IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1923{
1924#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1925 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1926#else
1927 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1928#endif
1929}
1930
1931
1932/**
1933 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1934 */
1935IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1936{
1937#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1938 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1939#else
1940 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
1941#endif
1942}
1943
1944
1945/**
1946 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1947 */
1948IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1949{
1950#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1951 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
1952#else
1953 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
1954#endif
1955}
1956
1957
1958/**
1959 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
1960 */
1961IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1962{
1963#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1964 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
1965#else
1966 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
1967#endif
1968}
1969
1970
1971
1972/*********************************************************************************************************************************
1973* Helpers: Flat memory fetches and stores. *
1974*********************************************************************************************************************************/
1975
1976/**
1977 * Used by TB code to load unsigned 8-bit data w/ flat address.
1978 * @note Zero extending the value to 64-bit to simplify assembly.
1979 */
1980IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1981{
1982#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1983 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1984#else
1985 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1986#endif
1987}
1988
1989
1990/**
1991 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1992 * to 16 bits.
1993 * @note Zero extending the value to 64-bit to simplify assembly.
1994 */
1995IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1996{
1997#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1998 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1999#else
2000 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2001#endif
2002}
2003
2004
2005/**
2006 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2007 * to 32 bits.
2008 * @note Zero extending the value to 64-bit to simplify assembly.
2009 */
2010IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2011{
2012#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2013 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2014#else
2015 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2016#endif
2017}
2018
2019
2020/**
2021 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2022 * to 64 bits.
2023 */
2024IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2025{
2026#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2027 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2028#else
2029 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2030#endif
2031}
2032
2033
2034/**
2035 * Used by TB code to load unsigned 16-bit data w/ flat address.
2036 * @note Zero extending the value to 64-bit to simplify assembly.
2037 */
2038IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2039{
2040#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2041 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2042#else
2043 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2044#endif
2045}
2046
2047
2048/**
2049 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2050 * to 32 bits.
2051 * @note Zero extending the value to 64-bit to simplify assembly.
2052 */
2053IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2054{
2055#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2056 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2057#else
2058 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2059#endif
2060}
2061
2062
2063/**
2064 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2065 * to 64 bits.
2066 * @note Zero extending the value to 64-bit to simplify assembly.
2067 */
2068IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2069{
2070#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2071 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2072#else
2073 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2074#endif
2075}
2076
2077
2078/**
2079 * Used by TB code to load unsigned 32-bit data w/ flat address.
2080 * @note Zero extending the value to 64-bit to simplify assembly.
2081 */
2082IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2083{
2084#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2085 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2086#else
2087 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2088#endif
2089}
2090
2091
2092/**
2093 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
2094 * to 64 bits.
2095 * @note Zero extending the value to 64-bit to simplify assembly.
2096 */
2097IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2098{
2099#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2100 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2101#else
2102 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2103#endif
2104}
2105
2106
2107/**
2108 * Used by TB code to load unsigned 64-bit data w/ flat address.
2109 */
2110IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2111{
2112#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2113 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2114#else
2115 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
2116#endif
2117}
2118
2119
2120/**
2121 * Used by TB code to store unsigned 8-bit data w/ flat address.
2122 */
2123IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
2124{
2125#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2126 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
2127#else
2128 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
2129#endif
2130}
2131
2132
2133/**
2134 * Used by TB code to store unsigned 16-bit data w/ flat address.
2135 */
2136IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2137{
2138#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2139 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
2140#else
2141 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
2142#endif
2143}
2144
2145
2146/**
2147 * Used by TB code to store unsigned 32-bit data w/ flat address.
2148 */
2149IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2150{
2151#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2152 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
2153#else
2154 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
2155#endif
2156}
2157
2158
2159/**
2160 * Used by TB code to store unsigned 64-bit data w/ flat address.
2161 */
2162IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2163{
2164#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2165 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
2166#else
2167 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
2168#endif
2169}
2170
2171
2172
2173/**
2174 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
2175 */
2176IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2177{
2178#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2179 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2180#else
2181 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2182#endif
2183}
2184
2185
2186/**
2187 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
2188 */
2189IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2190{
2191#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2192 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2193#else
2194 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2195#endif
2196}
2197
2198
2199/**
2200 * Used by TB code to store a segment selector value onto a flat stack.
2201 *
2202 * Intel CPUs doesn't do write a whole dword, thus the special function.
2203 */
2204IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2205{
2206#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2207 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2208#else
2209 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2210#endif
2211}
2212
2213
2214/**
2215 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
2216 */
2217IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2218{
2219#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2220 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2221#else
2222 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2223#endif
2224}
2225
2226
2227/**
2228 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2229 */
2230IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2231{
2232#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2233 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2234#else
2235 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
2236#endif
2237}
2238
2239
2240/**
2241 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2242 */
2243IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2244{
2245#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2246 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2247#else
2248 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
2249#endif
2250}
2251
2252
2253/**
2254 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2255 */
2256IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2257{
2258#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2259 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2260#else
2261 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
2262#endif
2263}
2264
2265
2266
2267/*********************************************************************************************************************************
2268* Helpers: Segmented memory mapping. *
2269*********************************************************************************************************************************/
2270
2271/**
2272 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
2273 * segmentation.
2274 */
2275IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2276 RTGCPTR GCPtrMem, uint8_t iSegReg))
2277{
2278#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2279 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2280#else
2281 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2282#endif
2283}
2284
2285
2286/**
2287 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
2288 */
2289IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2290 RTGCPTR GCPtrMem, uint8_t iSegReg))
2291{
2292#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2293 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2294#else
2295 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2296#endif
2297}
2298
2299
2300/**
2301 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
2302 */
2303IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2304 RTGCPTR GCPtrMem, uint8_t iSegReg))
2305{
2306#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2307 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2308#else
2309 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2310#endif
2311}
2312
2313
2314/**
2315 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
2316 */
2317IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2318 RTGCPTR GCPtrMem, uint8_t iSegReg))
2319{
2320#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2321 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2322#else
2323 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2324#endif
2325}
2326
2327
2328/**
2329 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
2330 * segmentation.
2331 */
2332IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2333 RTGCPTR GCPtrMem, uint8_t iSegReg))
2334{
2335#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2336 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2337#else
2338 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2339#endif
2340}
2341
2342
2343/**
2344 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
2345 */
2346IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2347 RTGCPTR GCPtrMem, uint8_t iSegReg))
2348{
2349#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2350 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2351#else
2352 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2353#endif
2354}
2355
2356
2357/**
2358 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
2359 */
2360IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2361 RTGCPTR GCPtrMem, uint8_t iSegReg))
2362{
2363#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2364 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2365#else
2366 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2367#endif
2368}
2369
2370
2371/**
2372 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
2373 */
2374IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2375 RTGCPTR GCPtrMem, uint8_t iSegReg))
2376{
2377#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2378 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2379#else
2380 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2381#endif
2382}
2383
2384
2385/**
2386 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
2387 * segmentation.
2388 */
2389IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2390 RTGCPTR GCPtrMem, uint8_t iSegReg))
2391{
2392#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2393 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2394#else
2395 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2396#endif
2397}
2398
2399
2400/**
2401 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
2402 */
2403IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2404 RTGCPTR GCPtrMem, uint8_t iSegReg))
2405{
2406#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2407 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2408#else
2409 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2410#endif
2411}
2412
2413
2414/**
2415 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
2416 */
2417IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2418 RTGCPTR GCPtrMem, uint8_t iSegReg))
2419{
2420#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2421 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2422#else
2423 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2424#endif
2425}
2426
2427
2428/**
2429 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
2430 */
2431IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2432 RTGCPTR GCPtrMem, uint8_t iSegReg))
2433{
2434#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2435 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2436#else
2437 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2438#endif
2439}
2440
2441
2442/**
2443 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
2444 * segmentation.
2445 */
2446IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2447 RTGCPTR GCPtrMem, uint8_t iSegReg))
2448{
2449#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2450 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2451#else
2452 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2453#endif
2454}
2455
2456
2457/**
2458 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
2459 */
2460IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2461 RTGCPTR GCPtrMem, uint8_t iSegReg))
2462{
2463#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2464 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2465#else
2466 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2467#endif
2468}
2469
2470
2471/**
2472 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
2473 */
2474IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2475 RTGCPTR GCPtrMem, uint8_t iSegReg))
2476{
2477#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2478 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2479#else
2480 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2481#endif
2482}
2483
2484
2485/**
2486 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
2487 */
2488IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2489 RTGCPTR GCPtrMem, uint8_t iSegReg))
2490{
2491#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2492 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2493#else
2494 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2495#endif
2496}
2497
2498
2499/**
2500 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
2501 */
2502IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2503 RTGCPTR GCPtrMem, uint8_t iSegReg))
2504{
2505#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2506 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2507#else
2508 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2509#endif
2510}
2511
2512
2513/**
2514 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
2515 */
2516IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2517 RTGCPTR GCPtrMem, uint8_t iSegReg))
2518{
2519#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2520 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2521#else
2522 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2523#endif
2524}
2525
2526
2527/**
2528 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
2529 * segmentation.
2530 */
2531IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2532 RTGCPTR GCPtrMem, uint8_t iSegReg))
2533{
2534#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2535 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2536#else
2537 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2538#endif
2539}
2540
2541
2542/**
2543 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
2544 */
2545IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2546 RTGCPTR GCPtrMem, uint8_t iSegReg))
2547{
2548#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2549 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2550#else
2551 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2552#endif
2553}
2554
2555
2556/**
2557 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
2558 */
2559IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2560 RTGCPTR GCPtrMem, uint8_t iSegReg))
2561{
2562#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2563 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2564#else
2565 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2566#endif
2567}
2568
2569
2570/**
2571 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
2572 */
2573IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2574 RTGCPTR GCPtrMem, uint8_t iSegReg))
2575{
2576#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2577 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2578#else
2579 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2580#endif
2581}
2582
2583
2584/*********************************************************************************************************************************
2585* Helpers: Flat memory mapping. *
2586*********************************************************************************************************************************/
2587
2588/**
2589 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
2590 * address.
2591 */
2592IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2593{
2594#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2595 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2596#else
2597 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2598#endif
2599}
2600
2601
2602/**
2603 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
2604 */
2605IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2606{
2607#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2608 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2609#else
2610 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2611#endif
2612}
2613
2614
2615/**
2616 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
2617 */
2618IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2619{
2620#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2621 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2622#else
2623 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2624#endif
2625}
2626
2627
2628/**
2629 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
2630 */
2631IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2632{
2633#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2634 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2635#else
2636 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2637#endif
2638}
2639
2640
2641/**
2642 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
2643 * address.
2644 */
2645IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2646{
2647#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2648 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2649#else
2650 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2651#endif
2652}
2653
2654
2655/**
2656 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
2657 */
2658IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2659{
2660#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2661 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2662#else
2663 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2664#endif
2665}
2666
2667
2668/**
2669 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
2670 */
2671IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2672{
2673#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2674 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2675#else
2676 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2677#endif
2678}
2679
2680
2681/**
2682 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
2683 */
2684IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2685{
2686#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2687 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2688#else
2689 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2690#endif
2691}
2692
2693
2694/**
2695 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
2696 * address.
2697 */
2698IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2699{
2700#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2701 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2702#else
2703 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2704#endif
2705}
2706
2707
2708/**
2709 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
2710 */
2711IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2712{
2713#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2714 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2715#else
2716 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2717#endif
2718}
2719
2720
2721/**
2722 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
2723 */
2724IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2725{
2726#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2727 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2728#else
2729 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2730#endif
2731}
2732
2733
2734/**
2735 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
2736 */
2737IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2738{
2739#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2740 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2741#else
2742 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2743#endif
2744}
2745
2746
2747/**
2748 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
2749 * address.
2750 */
2751IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2752{
2753#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2754 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2755#else
2756 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2757#endif
2758}
2759
2760
2761/**
2762 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
2763 */
2764IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2765{
2766#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2767 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2768#else
2769 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2770#endif
2771}
2772
2773
2774/**
2775 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
2776 */
2777IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2778{
2779#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2780 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2781#else
2782 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2783#endif
2784}
2785
2786
2787/**
2788 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
2789 */
2790IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2791{
2792#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2793 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2794#else
2795 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2796#endif
2797}
2798
2799
2800/**
2801 * Used by TB code to map 80-bit float data writeonly w/ flat address.
2802 */
2803IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2804{
2805#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2806 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2807#else
2808 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2809#endif
2810}
2811
2812
2813/**
2814 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
2815 */
2816IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2817{
2818#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2819 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2820#else
2821 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2822#endif
2823}
2824
2825
2826/**
2827 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
2828 * address.
2829 */
2830IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2831{
2832#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2833 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2834#else
2835 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2836#endif
2837}
2838
2839
2840/**
2841 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
2842 */
2843IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2844{
2845#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2846 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2847#else
2848 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2849#endif
2850}
2851
2852
2853/**
2854 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
2855 */
2856IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2857{
2858#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2859 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2860#else
2861 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2862#endif
2863}
2864
2865
2866/**
2867 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
2868 */
2869IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2870{
2871#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2872 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2873#else
2874 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2875#endif
2876}
2877
2878
2879/*********************************************************************************************************************************
2880* Helpers: Commit, rollback & unmap *
2881*********************************************************************************************************************************/
2882
2883/**
2884 * Used by TB code to commit and unmap a read-write memory mapping.
2885 */
2886IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2887{
2888 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
2889}
2890
2891
2892/**
2893 * Used by TB code to commit and unmap a read-write memory mapping.
2894 */
2895IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2896{
2897 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2898}
2899
2900
2901/**
2902 * Used by TB code to commit and unmap a write-only memory mapping.
2903 */
2904IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2905{
2906 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2907}
2908
2909
2910/**
2911 * Used by TB code to commit and unmap a read-only memory mapping.
2912 */
2913IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2914{
2915 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2916}
2917
2918
2919/**
2920 * Reinitializes the native recompiler state.
2921 *
2922 * Called before starting a new recompile job.
2923 */
2924static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2925{
2926 pReNative->cLabels = 0;
2927 pReNative->bmLabelTypes = 0;
2928 pReNative->cFixups = 0;
2929#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2930 pReNative->pDbgInfo->cEntries = 0;
2931#endif
2932 pReNative->pTbOrg = pTb;
2933 pReNative->cCondDepth = 0;
2934 pReNative->uCondSeqNo = 0;
2935 pReNative->uCheckIrqSeqNo = 0;
2936 pReNative->uTlbSeqNo = 0;
2937
2938#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2939 pReNative->Core.offPc = 0;
2940 pReNative->Core.cInstrPcUpdateSkipped = 0;
2941#endif
2942 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2943#if IEMNATIVE_HST_GREG_COUNT < 32
2944 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2945#endif
2946 ;
2947 pReNative->Core.bmHstRegsWithGstShadow = 0;
2948 pReNative->Core.bmGstRegShadows = 0;
2949 pReNative->Core.bmVars = 0;
2950 pReNative->Core.bmStack = 0;
2951 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
2952 pReNative->Core.u64ArgVars = UINT64_MAX;
2953
2954 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 13);
2955 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
2956 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
2957 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
2958 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
2959 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
2960 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
2961 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
2962 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
2963 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
2964 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
2965 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
2966 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
2967 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
2968
2969 /* Full host register reinit: */
2970 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
2971 {
2972 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
2973 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
2974 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
2975 }
2976
2977 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
2978 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
2979#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2980 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
2981#endif
2982#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2983 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2984#endif
2985#ifdef IEMNATIVE_REG_FIXED_TMP1
2986 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
2987#endif
2988#ifdef IEMNATIVE_REG_FIXED_PC_DBG
2989 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
2990#endif
2991 );
2992 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2993 {
2994 fRegs &= ~RT_BIT_32(idxReg);
2995 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
2996 }
2997
2998 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
2999#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3000 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
3001#endif
3002#ifdef IEMNATIVE_REG_FIXED_TMP0
3003 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3004#endif
3005#ifdef IEMNATIVE_REG_FIXED_TMP1
3006 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
3007#endif
3008#ifdef IEMNATIVE_REG_FIXED_PC_DBG
3009 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
3010#endif
3011
3012#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3013# ifdef RT_ARCH_ARM64
3014 /*
3015 * Arm64 has 32 128-bit registers only, in order to support emulating 256-bit registers we pair
3016 * two real registers statically to one virtual for now, leaving us with only 16 256-bit registers.
3017 * We always pair v0 with v1, v2 with v3, etc. so we mark the higher register as fixed here during init
3018 * and the register allocator assumes that it will be always free when the lower is picked.
3019 */
3020 uint32_t const fFixedAdditional = UINT32_C(0xaaaaaaaa);
3021# else
3022 uint32_t const fFixedAdditional = 0;
3023# endif
3024
3025 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
3026 | fFixedAdditional
3027# if IEMNATIVE_HST_SIMD_REG_COUNT < 32
3028 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
3029# endif
3030 ;
3031 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
3032 pReNative->Core.bmGstSimdRegShadows = 0;
3033 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
3034 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
3035
3036 /* Full host register reinit: */
3037 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
3038 {
3039 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
3040 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
3041 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
3042 }
3043
3044 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK | fFixedAdditional;
3045 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3046 {
3047 fRegs &= ~RT_BIT_32(idxReg);
3048 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
3049 }
3050
3051#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
3052 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3053#endif
3054
3055#endif
3056
3057 return pReNative;
3058}
3059
3060
3061/**
3062 * Allocates and initializes the native recompiler state.
3063 *
3064 * This is called the first time an EMT wants to recompile something.
3065 *
3066 * @returns Pointer to the new recompiler state.
3067 * @param pVCpu The cross context virtual CPU structure of the calling
3068 * thread.
3069 * @param pTb The TB that's about to be recompiled.
3070 * @thread EMT(pVCpu)
3071 */
3072static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
3073{
3074 VMCPU_ASSERT_EMT(pVCpu);
3075
3076 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
3077 AssertReturn(pReNative, NULL);
3078
3079 /*
3080 * Try allocate all the buffers and stuff we need.
3081 */
3082 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
3083 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
3084 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
3085#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3086 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
3087#endif
3088 if (RT_LIKELY( pReNative->pInstrBuf
3089 && pReNative->paLabels
3090 && pReNative->paFixups)
3091#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3092 && pReNative->pDbgInfo
3093#endif
3094 )
3095 {
3096 /*
3097 * Set the buffer & array sizes on success.
3098 */
3099 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
3100 pReNative->cLabelsAlloc = _8K;
3101 pReNative->cFixupsAlloc = _16K;
3102#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3103 pReNative->cDbgInfoAlloc = _16K;
3104#endif
3105
3106 /* Other constant stuff: */
3107 pReNative->pVCpu = pVCpu;
3108
3109 /*
3110 * Done, just need to save it and reinit it.
3111 */
3112 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
3113 return iemNativeReInit(pReNative, pTb);
3114 }
3115
3116 /*
3117 * Failed. Cleanup and return.
3118 */
3119 AssertFailed();
3120 RTMemFree(pReNative->pInstrBuf);
3121 RTMemFree(pReNative->paLabels);
3122 RTMemFree(pReNative->paFixups);
3123#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3124 RTMemFree(pReNative->pDbgInfo);
3125#endif
3126 RTMemFree(pReNative);
3127 return NULL;
3128}
3129
3130
3131/**
3132 * Creates a label
3133 *
3134 * If the label does not yet have a defined position,
3135 * call iemNativeLabelDefine() later to set it.
3136 *
3137 * @returns Label ID. Throws VBox status code on failure, so no need to check
3138 * the return value.
3139 * @param pReNative The native recompile state.
3140 * @param enmType The label type.
3141 * @param offWhere The instruction offset of the label. UINT32_MAX if the
3142 * label is not yet defined (default).
3143 * @param uData Data associated with the lable. Only applicable to
3144 * certain type of labels. Default is zero.
3145 */
3146DECL_HIDDEN_THROW(uint32_t)
3147iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3148 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
3149{
3150 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
3151
3152 /*
3153 * Locate existing label definition.
3154 *
3155 * This is only allowed for forward declarations where offWhere=UINT32_MAX
3156 * and uData is zero.
3157 */
3158 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3159 uint32_t const cLabels = pReNative->cLabels;
3160 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
3161#ifndef VBOX_STRICT
3162 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
3163 && offWhere == UINT32_MAX
3164 && uData == 0
3165#endif
3166 )
3167 {
3168#ifndef VBOX_STRICT
3169 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
3170 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3171 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
3172 if (idxLabel < pReNative->cLabels)
3173 return idxLabel;
3174#else
3175 for (uint32_t i = 0; i < cLabels; i++)
3176 if ( paLabels[i].enmType == enmType
3177 && paLabels[i].uData == uData)
3178 {
3179 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3180 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3181 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
3182 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
3183 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3184 return i;
3185 }
3186 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
3187 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3188#endif
3189 }
3190
3191 /*
3192 * Make sure we've got room for another label.
3193 */
3194 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
3195 { /* likely */ }
3196 else
3197 {
3198 uint32_t cNew = pReNative->cLabelsAlloc;
3199 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3200 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3201 cNew *= 2;
3202 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
3203 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
3204 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
3205 pReNative->paLabels = paLabels;
3206 pReNative->cLabelsAlloc = cNew;
3207 }
3208
3209 /*
3210 * Define a new label.
3211 */
3212 paLabels[cLabels].off = offWhere;
3213 paLabels[cLabels].enmType = enmType;
3214 paLabels[cLabels].uData = uData;
3215 pReNative->cLabels = cLabels + 1;
3216
3217 Assert((unsigned)enmType < 64);
3218 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
3219
3220 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3221 {
3222 Assert(uData == 0);
3223 pReNative->aidxUniqueLabels[enmType] = cLabels;
3224 }
3225
3226 if (offWhere != UINT32_MAX)
3227 {
3228#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3229 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3230 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
3231#endif
3232 }
3233 return cLabels;
3234}
3235
3236
3237/**
3238 * Defines the location of an existing label.
3239 *
3240 * @param pReNative The native recompile state.
3241 * @param idxLabel The label to define.
3242 * @param offWhere The position.
3243 */
3244DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
3245{
3246 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
3247 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
3248 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
3249 pLabel->off = offWhere;
3250#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3251 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3252 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
3253#endif
3254}
3255
3256
3257/**
3258 * Looks up a lable.
3259 *
3260 * @returns Label ID if found, UINT32_MAX if not.
3261 */
3262static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3263 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
3264{
3265 Assert((unsigned)enmType < 64);
3266 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
3267 {
3268 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3269 return pReNative->aidxUniqueLabels[enmType];
3270
3271 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3272 uint32_t const cLabels = pReNative->cLabels;
3273 for (uint32_t i = 0; i < cLabels; i++)
3274 if ( paLabels[i].enmType == enmType
3275 && paLabels[i].uData == uData
3276 && ( paLabels[i].off == offWhere
3277 || offWhere == UINT32_MAX
3278 || paLabels[i].off == UINT32_MAX))
3279 return i;
3280 }
3281 return UINT32_MAX;
3282}
3283
3284
3285/**
3286 * Adds a fixup.
3287 *
3288 * @throws VBox status code (int) on failure.
3289 * @param pReNative The native recompile state.
3290 * @param offWhere The instruction offset of the fixup location.
3291 * @param idxLabel The target label ID for the fixup.
3292 * @param enmType The fixup type.
3293 * @param offAddend Fixup addend if applicable to the type. Default is 0.
3294 */
3295DECL_HIDDEN_THROW(void)
3296iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
3297 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
3298{
3299 Assert(idxLabel <= UINT16_MAX);
3300 Assert((unsigned)enmType <= UINT8_MAX);
3301
3302 /*
3303 * Make sure we've room.
3304 */
3305 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
3306 uint32_t const cFixups = pReNative->cFixups;
3307 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
3308 { /* likely */ }
3309 else
3310 {
3311 uint32_t cNew = pReNative->cFixupsAlloc;
3312 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3313 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3314 cNew *= 2;
3315 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
3316 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
3317 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
3318 pReNative->paFixups = paFixups;
3319 pReNative->cFixupsAlloc = cNew;
3320 }
3321
3322 /*
3323 * Add the fixup.
3324 */
3325 paFixups[cFixups].off = offWhere;
3326 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
3327 paFixups[cFixups].enmType = enmType;
3328 paFixups[cFixups].offAddend = offAddend;
3329 pReNative->cFixups = cFixups + 1;
3330}
3331
3332
3333/**
3334 * Slow code path for iemNativeInstrBufEnsure.
3335 */
3336DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
3337{
3338 /* Double the buffer size till we meet the request. */
3339 uint32_t cNew = pReNative->cInstrBufAlloc;
3340 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
3341 do
3342 cNew *= 2;
3343 while (cNew < off + cInstrReq);
3344
3345 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
3346#ifdef RT_ARCH_ARM64
3347 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
3348#else
3349 uint32_t const cbMaxInstrBuf = _2M;
3350#endif
3351 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
3352
3353 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
3354 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
3355
3356#ifdef VBOX_STRICT
3357 pReNative->offInstrBufChecked = off + cInstrReq;
3358#endif
3359 pReNative->cInstrBufAlloc = cNew;
3360 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
3361}
3362
3363#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3364
3365/**
3366 * Grows the static debug info array used during recompilation.
3367 *
3368 * @returns Pointer to the new debug info block; throws VBox status code on
3369 * failure, so no need to check the return value.
3370 */
3371DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3372{
3373 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
3374 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
3375 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
3376 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
3377 pReNative->pDbgInfo = pDbgInfo;
3378 pReNative->cDbgInfoAlloc = cNew;
3379 return pDbgInfo;
3380}
3381
3382
3383/**
3384 * Adds a new debug info uninitialized entry, returning the pointer to it.
3385 */
3386DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3387{
3388 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
3389 { /* likely */ }
3390 else
3391 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
3392 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
3393}
3394
3395
3396/**
3397 * Debug Info: Adds a native offset record, if necessary.
3398 */
3399DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3400{
3401 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
3402
3403 /*
3404 * Search backwards to see if we've got a similar record already.
3405 */
3406 uint32_t idx = pDbgInfo->cEntries;
3407 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
3408 while (idx-- > idxStop)
3409 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
3410 {
3411 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
3412 return;
3413 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
3414 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
3415 break;
3416 }
3417
3418 /*
3419 * Add it.
3420 */
3421 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
3422 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
3423 pEntry->NativeOffset.offNative = off;
3424}
3425
3426
3427/**
3428 * Debug Info: Record info about a label.
3429 */
3430static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
3431{
3432 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3433 pEntry->Label.uType = kIemTbDbgEntryType_Label;
3434 pEntry->Label.uUnused = 0;
3435 pEntry->Label.enmLabel = (uint8_t)enmType;
3436 pEntry->Label.uData = uData;
3437}
3438
3439
3440/**
3441 * Debug Info: Record info about a threaded call.
3442 */
3443static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
3444{
3445 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3446 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
3447 pEntry->ThreadedCall.fRecompiled = fRecompiled;
3448 pEntry->ThreadedCall.uUnused = 0;
3449 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
3450}
3451
3452
3453/**
3454 * Debug Info: Record info about a new guest instruction.
3455 */
3456static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
3457{
3458 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3459 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
3460 pEntry->GuestInstruction.uUnused = 0;
3461 pEntry->GuestInstruction.fExec = fExec;
3462}
3463
3464
3465/**
3466 * Debug Info: Record info about guest register shadowing.
3467 */
3468DECL_HIDDEN_THROW(void)
3469iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
3470 uint8_t idxHstReg /*= UINT8_MAX*/, uint8_t idxHstRegPrev /*= UINT8_MAX*/)
3471{
3472 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3473 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
3474 pEntry->GuestRegShadowing.uUnused = 0;
3475 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
3476 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
3477 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
3478}
3479
3480
3481# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3482/**
3483 * Debug Info: Record info about guest register shadowing.
3484 */
3485DECL_HIDDEN_THROW(void)
3486iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
3487 uint8_t idxHstSimdReg /*= UINT8_MAX*/, uint8_t idxHstSimdRegPrev /*= UINT8_MAX*/)
3488{
3489 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3490 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
3491 pEntry->GuestSimdRegShadowing.uUnused = 0;
3492 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
3493 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
3494 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
3495}
3496# endif
3497
3498
3499# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3500/**
3501 * Debug Info: Record info about delayed RIP updates.
3502 */
3503DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t offPc, uint32_t cInstrSkipped)
3504{
3505 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3506 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
3507 pEntry->DelayedPcUpdate.offPc = offPc;
3508 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
3509}
3510# endif
3511
3512#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
3513
3514
3515/*********************************************************************************************************************************
3516* Register Allocator *
3517*********************************************************************************************************************************/
3518
3519/**
3520 * Register parameter indexes (indexed by argument number).
3521 */
3522DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
3523{
3524 IEMNATIVE_CALL_ARG0_GREG,
3525 IEMNATIVE_CALL_ARG1_GREG,
3526 IEMNATIVE_CALL_ARG2_GREG,
3527 IEMNATIVE_CALL_ARG3_GREG,
3528#if defined(IEMNATIVE_CALL_ARG4_GREG)
3529 IEMNATIVE_CALL_ARG4_GREG,
3530# if defined(IEMNATIVE_CALL_ARG5_GREG)
3531 IEMNATIVE_CALL_ARG5_GREG,
3532# if defined(IEMNATIVE_CALL_ARG6_GREG)
3533 IEMNATIVE_CALL_ARG6_GREG,
3534# if defined(IEMNATIVE_CALL_ARG7_GREG)
3535 IEMNATIVE_CALL_ARG7_GREG,
3536# endif
3537# endif
3538# endif
3539#endif
3540};
3541AssertCompile(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3542
3543/**
3544 * Call register masks indexed by argument count.
3545 */
3546DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
3547{
3548 0,
3549 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
3550 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
3551 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
3552 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3553 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
3554#if defined(IEMNATIVE_CALL_ARG4_GREG)
3555 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3556 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
3557# if defined(IEMNATIVE_CALL_ARG5_GREG)
3558 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3559 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
3560# if defined(IEMNATIVE_CALL_ARG6_GREG)
3561 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3562 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3563 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
3564# if defined(IEMNATIVE_CALL_ARG7_GREG)
3565 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3566 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3567 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
3568# endif
3569# endif
3570# endif
3571#endif
3572};
3573
3574#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
3575/**
3576 * BP offset of the stack argument slots.
3577 *
3578 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
3579 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
3580 */
3581DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
3582{
3583 IEMNATIVE_FP_OFF_STACK_ARG0,
3584# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
3585 IEMNATIVE_FP_OFF_STACK_ARG1,
3586# endif
3587# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
3588 IEMNATIVE_FP_OFF_STACK_ARG2,
3589# endif
3590# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
3591 IEMNATIVE_FP_OFF_STACK_ARG3,
3592# endif
3593};
3594AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
3595#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
3596
3597/**
3598 * Info about shadowed guest register values.
3599 * @see IEMNATIVEGSTREG
3600 */
3601DECL_HIDDEN_CONST(IEMANTIVEGSTREGINFO const) g_aGstShadowInfo[] =
3602{
3603#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
3604 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
3605 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
3606 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
3607 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
3608 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
3609 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
3610 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
3611 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
3612 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
3613 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
3614 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
3615 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
3616 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
3617 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
3618 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
3619 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
3620 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
3621 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
3622 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
3623 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
3624 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
3625 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
3626 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
3627 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
3628 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
3629 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
3630 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
3631 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
3632 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
3633 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
3634 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
3635 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
3636 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
3637 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
3638 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
3639 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
3640 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
3641 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
3642 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
3643 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
3644 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
3645 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
3646 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
3647 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
3648 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
3649 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
3650 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
3651 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
3652#undef CPUMCTX_OFF_AND_SIZE
3653};
3654AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
3655
3656
3657/** Host CPU general purpose register names. */
3658DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
3659{
3660#ifdef RT_ARCH_AMD64
3661 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
3662#elif RT_ARCH_ARM64
3663 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
3664 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
3665#else
3666# error "port me"
3667#endif
3668};
3669
3670
3671#if 0 /* unused */
3672/**
3673 * Tries to locate a suitable register in the given register mask.
3674 *
3675 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3676 * failed.
3677 *
3678 * @returns Host register number on success, returns UINT8_MAX on failure.
3679 */
3680static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
3681{
3682 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3683 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3684 if (fRegs)
3685 {
3686 /** @todo pick better here: */
3687 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
3688
3689 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3690 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3691 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3692 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3693
3694 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3695 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3696 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3697 return idxReg;
3698 }
3699 return UINT8_MAX;
3700}
3701#endif /* unused */
3702
3703
3704/**
3705 * Locate a register, possibly freeing one up.
3706 *
3707 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3708 * failed.
3709 *
3710 * @returns Host register number on success. Returns UINT8_MAX if no registers
3711 * found, the caller is supposed to deal with this and raise a
3712 * allocation type specific status code (if desired).
3713 *
3714 * @throws VBox status code if we're run into trouble spilling a variable of
3715 * recording debug info. Does NOT throw anything if we're out of
3716 * registers, though.
3717 */
3718static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3719 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3720{
3721 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
3722 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3723 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3724
3725 /*
3726 * Try a freed register that's shadowing a guest register.
3727 */
3728 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3729 if (fRegs)
3730 {
3731 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
3732
3733#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3734 /*
3735 * When we have livness information, we use it to kick out all shadowed
3736 * guest register that will not be needed any more in this TB. If we're
3737 * lucky, this may prevent us from ending up here again.
3738 *
3739 * Note! We must consider the previous entry here so we don't free
3740 * anything that the current threaded function requires (current
3741 * entry is produced by the next threaded function).
3742 */
3743 uint32_t const idxCurCall = pReNative->idxCurCall;
3744 if (idxCurCall > 0)
3745 {
3746 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
3747
3748# ifndef IEMLIVENESS_EXTENDED_LAYOUT
3749 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
3750 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
3751 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
3752#else
3753 /* Construct a mask of the registers not in the read or write state.
3754 Note! We could skips writes, if they aren't from us, as this is just
3755 a hack to prevent trashing registers that have just been written
3756 or will be written when we retire the current instruction. */
3757 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3758 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3759 & IEMLIVENESSBIT_MASK;
3760#endif
3761 /* Merge EFLAGS. */
3762 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
3763 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
3764 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
3765 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
3766 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
3767
3768 /* If it matches any shadowed registers. */
3769 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
3770 {
3771 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
3772 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
3773 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
3774
3775 /* See if we've got any unshadowed registers we can return now. */
3776 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
3777 if (fUnshadowedRegs)
3778 {
3779 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
3780 return (fPreferVolatile
3781 ? ASMBitFirstSetU32(fUnshadowedRegs)
3782 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3783 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
3784 - 1;
3785 }
3786 }
3787 }
3788#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
3789
3790 unsigned const idxReg = (fPreferVolatile
3791 ? ASMBitFirstSetU32(fRegs)
3792 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3793 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
3794 - 1;
3795
3796 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3797 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3798 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3799 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3800
3801 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3802 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3803 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3804 return idxReg;
3805 }
3806
3807 /*
3808 * Try free up a variable that's in a register.
3809 *
3810 * We do two rounds here, first evacuating variables we don't need to be
3811 * saved on the stack, then in the second round move things to the stack.
3812 */
3813 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
3814 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3815 {
3816 uint32_t fVars = pReNative->Core.bmVars;
3817 while (fVars)
3818 {
3819 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3820 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3821 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3822 && (RT_BIT_32(idxReg) & fRegMask)
3823 && ( iLoop == 0
3824 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3825 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3826 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3827 {
3828 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3829 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3830 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3831 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3832 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3833 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3834
3835 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3836 {
3837 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
3838 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3839 }
3840
3841 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3842 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3843
3844 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3845 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3846 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3847 return idxReg;
3848 }
3849 fVars &= ~RT_BIT_32(idxVar);
3850 }
3851 }
3852
3853 return UINT8_MAX;
3854}
3855
3856
3857/**
3858 * Reassigns a variable to a different register specified by the caller.
3859 *
3860 * @returns The new code buffer position.
3861 * @param pReNative The native recompile state.
3862 * @param off The current code buffer position.
3863 * @param idxVar The variable index.
3864 * @param idxRegOld The old host register number.
3865 * @param idxRegNew The new host register number.
3866 * @param pszCaller The caller for logging.
3867 */
3868static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3869 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3870{
3871 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3872 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
3873 RT_NOREF(pszCaller);
3874
3875 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3876
3877 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3878 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
3879 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3880 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3881
3882 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3883 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3884 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3885 if (fGstRegShadows)
3886 {
3887 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3888 | RT_BIT_32(idxRegNew);
3889 while (fGstRegShadows)
3890 {
3891 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3892 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3893
3894 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3895 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3896 }
3897 }
3898
3899 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
3900 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3901 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3902 return off;
3903}
3904
3905
3906/**
3907 * Moves a variable to a different register or spills it onto the stack.
3908 *
3909 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3910 * kinds can easily be recreated if needed later.
3911 *
3912 * @returns The new code buffer position.
3913 * @param pReNative The native recompile state.
3914 * @param off The current code buffer position.
3915 * @param idxVar The variable index.
3916 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3917 * call-volatile registers.
3918 */
3919DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3920 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_GREG_MASK*/)
3921{
3922 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3923 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3924 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
3925 Assert(!pVar->fRegAcquired);
3926
3927 uint8_t const idxRegOld = pVar->idxReg;
3928 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3929 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3930 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3931 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3932 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3933 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3934 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3935 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3936
3937
3938 /** @todo Add statistics on this.*/
3939 /** @todo Implement basic variable liveness analysis (python) so variables
3940 * can be freed immediately once no longer used. This has the potential to
3941 * be trashing registers and stack for dead variables.
3942 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
3943
3944 /*
3945 * First try move it to a different register, as that's cheaper.
3946 */
3947 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3948 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3949 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3950 if (fRegs)
3951 {
3952 /* Avoid using shadow registers, if possible. */
3953 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3954 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3955 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3956 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3957 }
3958
3959 /*
3960 * Otherwise we must spill the register onto the stack.
3961 */
3962 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3963 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3964 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3965 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3966
3967 pVar->idxReg = UINT8_MAX;
3968 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3969 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3970 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3971 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3972 return off;
3973}
3974
3975
3976/**
3977 * Allocates a temporary host general purpose register.
3978 *
3979 * This may emit code to save register content onto the stack in order to free
3980 * up a register.
3981 *
3982 * @returns The host register number; throws VBox status code on failure,
3983 * so no need to check the return value.
3984 * @param pReNative The native recompile state.
3985 * @param poff Pointer to the variable with the code buffer position.
3986 * This will be update if we need to move a variable from
3987 * register to stack in order to satisfy the request.
3988 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3989 * registers (@c true, default) or the other way around
3990 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3991 */
3992DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
3993{
3994 /*
3995 * Try find a completely unused register, preferably a call-volatile one.
3996 */
3997 uint8_t idxReg;
3998 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3999 & ~pReNative->Core.bmHstRegsWithGstShadow
4000 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
4001 if (fRegs)
4002 {
4003 if (fPreferVolatile)
4004 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4005 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4006 else
4007 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4008 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4009 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4010 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4011 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4012 }
4013 else
4014 {
4015 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
4016 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4017 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4018 }
4019 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4020}
4021
4022
4023/**
4024 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
4025 * registers.
4026 *
4027 * @returns The host register number; throws VBox status code on failure,
4028 * so no need to check the return value.
4029 * @param pReNative The native recompile state.
4030 * @param poff Pointer to the variable with the code buffer position.
4031 * This will be update if we need to move a variable from
4032 * register to stack in order to satisfy the request.
4033 * @param fRegMask Mask of acceptable registers.
4034 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4035 * registers (@c true, default) or the other way around
4036 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4037 */
4038DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
4039 bool fPreferVolatile /*= true*/)
4040{
4041 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
4042 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
4043
4044 /*
4045 * Try find a completely unused register, preferably a call-volatile one.
4046 */
4047 uint8_t idxReg;
4048 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4049 & ~pReNative->Core.bmHstRegsWithGstShadow
4050 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
4051 & fRegMask;
4052 if (fRegs)
4053 {
4054 if (fPreferVolatile)
4055 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4056 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4057 else
4058 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4059 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4060 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4061 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4062 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4063 }
4064 else
4065 {
4066 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
4067 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4068 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4069 }
4070 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4071}
4072
4073
4074/**
4075 * Allocates a temporary register for loading an immediate value into.
4076 *
4077 * This will emit code to load the immediate, unless there happens to be an
4078 * unused register with the value already loaded.
4079 *
4080 * The caller will not modify the returned register, it must be considered
4081 * read-only. Free using iemNativeRegFreeTmpImm.
4082 *
4083 * @returns The host register number; throws VBox status code on failure, so no
4084 * need to check the return value.
4085 * @param pReNative The native recompile state.
4086 * @param poff Pointer to the variable with the code buffer position.
4087 * @param uImm The immediate value that the register must hold upon
4088 * return.
4089 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4090 * registers (@c true, default) or the other way around
4091 * (@c false).
4092 *
4093 * @note Reusing immediate values has not been implemented yet.
4094 */
4095DECL_HIDDEN_THROW(uint8_t)
4096iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
4097{
4098 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
4099 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
4100 return idxReg;
4101}
4102
4103
4104/**
4105 * Allocates a temporary host general purpose register for keeping a guest
4106 * register value.
4107 *
4108 * Since we may already have a register holding the guest register value,
4109 * code will be emitted to do the loading if that's not the case. Code may also
4110 * be emitted if we have to free up a register to satify the request.
4111 *
4112 * @returns The host register number; throws VBox status code on failure, so no
4113 * need to check the return value.
4114 * @param pReNative The native recompile state.
4115 * @param poff Pointer to the variable with the code buffer
4116 * position. This will be update if we need to move a
4117 * variable from register to stack in order to satisfy
4118 * the request.
4119 * @param enmGstReg The guest register that will is to be updated.
4120 * @param enmIntendedUse How the caller will be using the host register.
4121 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
4122 * register is okay (default). The ASSUMPTION here is
4123 * that the caller has already flushed all volatile
4124 * registers, so this is only applied if we allocate a
4125 * new register.
4126 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
4127 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
4128 */
4129DECL_HIDDEN_THROW(uint8_t)
4130iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
4131 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
4132 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
4133{
4134 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4135#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4136 AssertMsg( fSkipLivenessAssert
4137 || pReNative->idxCurCall == 0
4138 || enmGstReg == kIemNativeGstReg_Pc
4139 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4140 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4141 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
4142 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4143 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
4144 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4145#endif
4146 RT_NOREF(fSkipLivenessAssert);
4147#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
4148 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
4149#endif
4150 uint32_t const fRegMask = !fNoVolatileRegs
4151 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
4152 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4153
4154 /*
4155 * First check if the guest register value is already in a host register.
4156 */
4157 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4158 {
4159 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4160 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4161 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4162 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4163
4164 /* It's not supposed to be allocated... */
4165 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4166 {
4167 /*
4168 * If the register will trash the guest shadow copy, try find a
4169 * completely unused register we can use instead. If that fails,
4170 * we need to disassociate the host reg from the guest reg.
4171 */
4172 /** @todo would be nice to know if preserving the register is in any way helpful. */
4173 /* If the purpose is calculations, try duplicate the register value as
4174 we'll be clobbering the shadow. */
4175 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
4176 && ( ~pReNative->Core.bmHstRegs
4177 & ~pReNative->Core.bmHstRegsWithGstShadow
4178 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
4179 {
4180 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
4181
4182 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4183
4184 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4185 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4186 g_apszIemNativeHstRegNames[idxRegNew]));
4187 idxReg = idxRegNew;
4188 }
4189 /* If the current register matches the restrictions, go ahead and allocate
4190 it for the caller. */
4191 else if (fRegMask & RT_BIT_32(idxReg))
4192 {
4193 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4194 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4195 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4196 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4197 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
4198 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4199 else
4200 {
4201 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
4202 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
4203 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4204 }
4205 }
4206 /* Otherwise, allocate a register that satisfies the caller and transfer
4207 the shadowing if compatible with the intended use. (This basically
4208 means the call wants a non-volatile register (RSP push/pop scenario).) */
4209 else
4210 {
4211 Assert(fNoVolatileRegs);
4212 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
4213 !fNoVolatileRegs
4214 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
4215 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4216 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4217 {
4218 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4219 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
4220 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
4221 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4222 }
4223 else
4224 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4225 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4226 g_apszIemNativeHstRegNames[idxRegNew]));
4227 idxReg = idxRegNew;
4228 }
4229 }
4230 else
4231 {
4232 /*
4233 * Oops. Shadowed guest register already allocated!
4234 *
4235 * Allocate a new register, copy the value and, if updating, the
4236 * guest shadow copy assignment to the new register.
4237 */
4238 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4239 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
4240 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
4241 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
4242
4243 /** @todo share register for readonly access. */
4244 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
4245 enmIntendedUse == kIemNativeGstRegUse_Calculation);
4246
4247 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4248 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4249
4250 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4251 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4252 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
4253 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4254 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4255 else
4256 {
4257 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4258 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
4259 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4260 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4261 }
4262 idxReg = idxRegNew;
4263 }
4264 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
4265
4266#ifdef VBOX_STRICT
4267 /* Strict builds: Check that the value is correct. */
4268 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4269#endif
4270
4271 return idxReg;
4272 }
4273
4274 /*
4275 * Allocate a new register, load it with the guest value and designate it as a copy of the
4276 */
4277 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
4278
4279 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4280 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
4281
4282 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4283 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
4284 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
4285 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4286
4287 return idxRegNew;
4288}
4289
4290
4291/**
4292 * Allocates a temporary host general purpose register that already holds the
4293 * given guest register value.
4294 *
4295 * The use case for this function is places where the shadowing state cannot be
4296 * modified due to branching and such. This will fail if the we don't have a
4297 * current shadow copy handy or if it's incompatible. The only code that will
4298 * be emitted here is value checking code in strict builds.
4299 *
4300 * The intended use can only be readonly!
4301 *
4302 * @returns The host register number, UINT8_MAX if not present.
4303 * @param pReNative The native recompile state.
4304 * @param poff Pointer to the instruction buffer offset.
4305 * Will be updated in strict builds if a register is
4306 * found.
4307 * @param enmGstReg The guest register that will is to be updated.
4308 * @note In strict builds, this may throw instruction buffer growth failures.
4309 * Non-strict builds will not throw anything.
4310 * @sa iemNativeRegAllocTmpForGuestReg
4311 */
4312DECL_HIDDEN_THROW(uint8_t)
4313iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4314{
4315 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4316#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4317 AssertMsg( pReNative->idxCurCall == 0
4318 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4319 || enmGstReg == kIemNativeGstReg_Pc,
4320 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4321#endif
4322
4323 /*
4324 * First check if the guest register value is already in a host register.
4325 */
4326 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4327 {
4328 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4329 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4330 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4331 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4332
4333 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4334 {
4335 /*
4336 * We only do readonly use here, so easy compared to the other
4337 * variant of this code.
4338 */
4339 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4340 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4341 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4342 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
4343 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4344
4345#ifdef VBOX_STRICT
4346 /* Strict builds: Check that the value is correct. */
4347 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4348#else
4349 RT_NOREF(poff);
4350#endif
4351 return idxReg;
4352 }
4353 }
4354
4355 return UINT8_MAX;
4356}
4357
4358
4359/**
4360 * Allocates argument registers for a function call.
4361 *
4362 * @returns New code buffer offset on success; throws VBox status code on failure, so no
4363 * need to check the return value.
4364 * @param pReNative The native recompile state.
4365 * @param off The current code buffer offset.
4366 * @param cArgs The number of arguments the function call takes.
4367 */
4368DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
4369{
4370 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
4371 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
4372 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4373 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4374
4375 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4376 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4377 else if (cArgs == 0)
4378 return true;
4379
4380 /*
4381 * Do we get luck and all register are free and not shadowing anything?
4382 */
4383 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
4384 for (uint32_t i = 0; i < cArgs; i++)
4385 {
4386 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4387 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4388 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4389 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4390 }
4391 /*
4392 * Okay, not lucky so we have to free up the registers.
4393 */
4394 else
4395 for (uint32_t i = 0; i < cArgs; i++)
4396 {
4397 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4398 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
4399 {
4400 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4401 {
4402 case kIemNativeWhat_Var:
4403 {
4404 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4405 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4406 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
4407 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4408 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
4409
4410 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
4411 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4412 else
4413 {
4414 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4415 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4416 }
4417 break;
4418 }
4419
4420 case kIemNativeWhat_Tmp:
4421 case kIemNativeWhat_Arg:
4422 case kIemNativeWhat_rc:
4423 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4424 default:
4425 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
4426 }
4427
4428 }
4429 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4430 {
4431 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4432 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4433 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4434 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4435 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4436 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4437 }
4438 else
4439 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4440 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4441 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4442 }
4443 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
4444 return true;
4445}
4446
4447
4448DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
4449
4450
4451#if 0
4452/**
4453 * Frees a register assignment of any type.
4454 *
4455 * @param pReNative The native recompile state.
4456 * @param idxHstReg The register to free.
4457 *
4458 * @note Does not update variables.
4459 */
4460DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4461{
4462 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4463 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4464 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
4465 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
4466 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
4467 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
4468 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
4469 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
4470 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
4471 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
4472 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4473 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4474 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4475 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4476
4477 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4478 /* no flushing, right:
4479 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4480 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4481 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4482 */
4483}
4484#endif
4485
4486
4487/**
4488 * Frees a temporary register.
4489 *
4490 * Any shadow copies of guest registers assigned to the host register will not
4491 * be flushed by this operation.
4492 */
4493DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4494{
4495 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4496 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
4497 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4498 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
4499 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4500}
4501
4502
4503/**
4504 * Frees a temporary immediate register.
4505 *
4506 * It is assumed that the call has not modified the register, so it still hold
4507 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
4508 */
4509DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4510{
4511 iemNativeRegFreeTmp(pReNative, idxHstReg);
4512}
4513
4514
4515/**
4516 * Frees a register assigned to a variable.
4517 *
4518 * The register will be disassociated from the variable.
4519 */
4520DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4521{
4522 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4523 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4524 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4525 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4526 Assert(pReNative->Core.aVars[idxVar].idxReg == idxHstReg);
4527
4528 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4529 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4530 if (!fFlushShadows)
4531 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4532 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4533 else
4534 {
4535 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4536 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4537 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4538 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4539 uint64_t fGstRegShadows = fGstRegShadowsOld;
4540 while (fGstRegShadows)
4541 {
4542 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4543 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4544
4545 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4546 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4547 }
4548 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4549 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4550 }
4551}
4552
4553
4554/**
4555 * Called right before emitting a call instruction to move anything important
4556 * out of call-volatile registers, free and flush the call-volatile registers,
4557 * optionally freeing argument variables.
4558 *
4559 * @returns New code buffer offset, UINT32_MAX on failure.
4560 * @param pReNative The native recompile state.
4561 * @param off The code buffer offset.
4562 * @param cArgs The number of arguments the function call takes.
4563 * It is presumed that the host register part of these have
4564 * been allocated as such already and won't need moving,
4565 * just freeing.
4566 * @param fKeepVars Mask of variables that should keep their register
4567 * assignments. Caller must take care to handle these.
4568 */
4569DECL_HIDDEN_THROW(uint32_t)
4570iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4571{
4572 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4573
4574 /* fKeepVars will reduce this mask. */
4575 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4576
4577 /*
4578 * Move anything important out of volatile registers.
4579 */
4580 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4581 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4582 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4583#ifdef IEMNATIVE_REG_FIXED_TMP0
4584 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4585#endif
4586#ifdef IEMNATIVE_REG_FIXED_TMP1
4587 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
4588#endif
4589#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4590 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4591#endif
4592 & ~g_afIemNativeCallRegs[cArgs];
4593
4594 fRegsToMove &= pReNative->Core.bmHstRegs;
4595 if (!fRegsToMove)
4596 { /* likely */ }
4597 else
4598 {
4599 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4600 while (fRegsToMove != 0)
4601 {
4602 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4603 fRegsToMove &= ~RT_BIT_32(idxReg);
4604
4605 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4606 {
4607 case kIemNativeWhat_Var:
4608 {
4609 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4610 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4611 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4612 Assert(pVar->idxReg == idxReg);
4613 if (!(RT_BIT_32(idxVar) & fKeepVars))
4614 {
4615 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4616 idxVar, pVar->enmKind, pVar->idxReg));
4617 if (pVar->enmKind != kIemNativeVarKind_Stack)
4618 pVar->idxReg = UINT8_MAX;
4619 else
4620 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4621 }
4622 else
4623 fRegsToFree &= ~RT_BIT_32(idxReg);
4624 continue;
4625 }
4626
4627 case kIemNativeWhat_Arg:
4628 AssertMsgFailed(("What?!?: %u\n", idxReg));
4629 continue;
4630
4631 case kIemNativeWhat_rc:
4632 case kIemNativeWhat_Tmp:
4633 AssertMsgFailed(("Missing free: %u\n", idxReg));
4634 continue;
4635
4636 case kIemNativeWhat_FixedTmp:
4637 case kIemNativeWhat_pVCpuFixed:
4638 case kIemNativeWhat_pCtxFixed:
4639 case kIemNativeWhat_PcShadow:
4640 case kIemNativeWhat_FixedReserved:
4641 case kIemNativeWhat_Invalid:
4642 case kIemNativeWhat_End:
4643 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4644 }
4645 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4646 }
4647 }
4648
4649 /*
4650 * Do the actual freeing.
4651 */
4652 if (pReNative->Core.bmHstRegs & fRegsToFree)
4653 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4654 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4655 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4656
4657 /* If there are guest register shadows in any call-volatile register, we
4658 have to clear the corrsponding guest register masks for each register. */
4659 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4660 if (fHstRegsWithGstShadow)
4661 {
4662 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4663 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
4664 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4665 do
4666 {
4667 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4668 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4669
4670 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4671 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4672 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4673 } while (fHstRegsWithGstShadow != 0);
4674 }
4675
4676 return off;
4677}
4678
4679
4680/**
4681 * Flushes a set of guest register shadow copies.
4682 *
4683 * This is usually done after calling a threaded function or a C-implementation
4684 * of an instruction.
4685 *
4686 * @param pReNative The native recompile state.
4687 * @param fGstRegs Set of guest registers to flush.
4688 */
4689DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4690{
4691 /*
4692 * Reduce the mask by what's currently shadowed
4693 */
4694 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4695 fGstRegs &= bmGstRegShadowsOld;
4696 if (fGstRegs)
4697 {
4698 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4699 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4700 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4701 if (bmGstRegShadowsNew)
4702 {
4703 /*
4704 * Partial.
4705 */
4706 do
4707 {
4708 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4709 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4710 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4711 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4712 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4713
4714 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4715 fGstRegs &= ~fInThisHstReg;
4716 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4717 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4718 if (!fGstRegShadowsNew)
4719 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4720 } while (fGstRegs != 0);
4721 }
4722 else
4723 {
4724 /*
4725 * Clear all.
4726 */
4727 do
4728 {
4729 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4730 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4731 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4732 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4733 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4734
4735 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4736 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4737 } while (fGstRegs != 0);
4738 pReNative->Core.bmHstRegsWithGstShadow = 0;
4739 }
4740 }
4741}
4742
4743
4744/**
4745 * Flushes guest register shadow copies held by a set of host registers.
4746 *
4747 * This is used with the TLB lookup code for ensuring that we don't carry on
4748 * with any guest shadows in volatile registers, as these will get corrupted by
4749 * a TLB miss.
4750 *
4751 * @param pReNative The native recompile state.
4752 * @param fHstRegs Set of host registers to flush guest shadows for.
4753 */
4754DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
4755{
4756 /*
4757 * Reduce the mask by what's currently shadowed.
4758 */
4759 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
4760 fHstRegs &= bmHstRegsWithGstShadowOld;
4761 if (fHstRegs)
4762 {
4763 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
4764 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
4765 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
4766 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
4767 if (bmHstRegsWithGstShadowNew)
4768 {
4769 /*
4770 * Partial (likely).
4771 */
4772 uint64_t fGstShadows = 0;
4773 do
4774 {
4775 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4776 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4777 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4778 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4779
4780 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4781 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4782 fHstRegs &= ~RT_BIT_32(idxHstReg);
4783 } while (fHstRegs != 0);
4784 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
4785 }
4786 else
4787 {
4788 /*
4789 * Clear all.
4790 */
4791 do
4792 {
4793 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4794 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4795 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4796 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4797
4798 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4799 fHstRegs &= ~RT_BIT_32(idxHstReg);
4800 } while (fHstRegs != 0);
4801 pReNative->Core.bmGstRegShadows = 0;
4802 }
4803 }
4804}
4805
4806
4807/**
4808 * Restores guest shadow copies in volatile registers.
4809 *
4810 * This is used after calling a helper function (think TLB miss) to restore the
4811 * register state of volatile registers.
4812 *
4813 * @param pReNative The native recompile state.
4814 * @param off The code buffer offset.
4815 * @param fHstRegsActiveShadows Set of host registers which are allowed to
4816 * be active (allocated) w/o asserting. Hack.
4817 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
4818 * iemNativeVarRestoreVolatileRegsPostHlpCall()
4819 */
4820DECL_HIDDEN_THROW(uint32_t)
4821iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
4822{
4823 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4824 if (fHstRegs)
4825 {
4826 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
4827 do
4828 {
4829 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4830
4831 /* It's not fatal if a register is active holding a variable that
4832 shadowing a guest register, ASSUMING all pending guest register
4833 writes were flushed prior to the helper call. However, we'll be
4834 emitting duplicate restores, so it wasts code space. */
4835 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
4836 RT_NOREF(fHstRegsActiveShadows);
4837
4838 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4839 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
4840 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
4841 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
4842
4843 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4844 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
4845
4846 fHstRegs &= ~RT_BIT_32(idxHstReg);
4847 } while (fHstRegs != 0);
4848 }
4849 return off;
4850}
4851
4852
4853
4854
4855/*********************************************************************************************************************************
4856* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
4857*********************************************************************************************************************************/
4858#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4859
4860/**
4861 * Info about shadowed guest SIMD register values.
4862 * @see IEMNATIVEGSTSIMDREG
4863 */
4864static struct
4865{
4866 /** Offset in VMCPU of XMM (low 128-bit) registers. */
4867 uint32_t offXmm;
4868 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
4869 uint32_t offYmm;
4870 /** Name (for logging). */
4871 const char *pszName;
4872} const g_aGstSimdShadowInfo[] =
4873{
4874#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
4875 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
4876 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
4877 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
4878 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
4879 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
4880 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
4881 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
4882 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
4883 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
4884 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
4885 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
4886 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
4887 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
4888 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
4889 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
4890 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
4891 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
4892#undef CPUMCTX_OFF_AND_SIZE
4893};
4894AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
4895
4896
4897#ifdef LOG_ENABLED
4898/** Host CPU SIMD register names. */
4899DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
4900{
4901#ifdef RT_ARCH_AMD64
4902 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
4903#elif RT_ARCH_ARM64
4904 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
4905 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
4906#else
4907# error "port me"
4908#endif
4909};
4910#endif
4911
4912
4913/**
4914 * Frees a temporary SIMD register.
4915 *
4916 * Any shadow copies of guest registers assigned to the host register will not
4917 * be flushed by this operation.
4918 */
4919DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
4920{
4921 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
4922 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
4923 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
4924 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
4925 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
4926}
4927
4928
4929/**
4930 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
4931 *
4932 * @returns New code bufferoffset.
4933 * @param pReNative The native recompile state.
4934 * @param off Current code buffer position.
4935 * @param enmGstSimdReg The guest SIMD register to flush.
4936 */
4937DECL_HIDDEN_THROW(uint32_t)
4938iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
4939{
4940 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
4941
4942 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
4943 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
4944 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg),
4945 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)));
4946
4947 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
4948 {
4949 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
4950 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
4951 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
4952 }
4953
4954 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))
4955 {
4956 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
4957 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
4958 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
4959 }
4960
4961 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, enmGstSimdReg);
4962 return off;
4963}
4964
4965
4966/**
4967 * Locate a register, possibly freeing one up.
4968 *
4969 * This ASSUMES the caller has done the minimal/optimal allocation checks and
4970 * failed.
4971 *
4972 * @returns Host register number on success. Returns UINT8_MAX if no registers
4973 * found, the caller is supposed to deal with this and raise a
4974 * allocation type specific status code (if desired).
4975 *
4976 * @throws VBox status code if we're run into trouble spilling a variable of
4977 * recording debug info. Does NOT throw anything if we're out of
4978 * registers, though.
4979 */
4980static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
4981 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
4982{
4983 //STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
4984 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
4985 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
4986
4987 /*
4988 * Try a freed register that's shadowing a guest register.
4989 */
4990 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
4991 if (fRegs)
4992 {
4993 //STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
4994
4995#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4996 /*
4997 * When we have livness information, we use it to kick out all shadowed
4998 * guest register that will not be needed any more in this TB. If we're
4999 * lucky, this may prevent us from ending up here again.
5000 *
5001 * Note! We must consider the previous entry here so we don't free
5002 * anything that the current threaded function requires (current
5003 * entry is produced by the next threaded function).
5004 */
5005 uint32_t const idxCurCall = pReNative->idxCurCall;
5006 if (idxCurCall > 0)
5007 {
5008 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
5009
5010# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5011 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
5012 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
5013 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
5014#else
5015 /* Construct a mask of the registers not in the read or write state.
5016 Note! We could skips writes, if they aren't from us, as this is just
5017 a hack to prevent trashing registers that have just been written
5018 or will be written when we retire the current instruction. */
5019 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5020 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5021 & IEMLIVENESSBIT_MASK;
5022#endif
5023 /* If it matches any shadowed registers. */
5024 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
5025 {
5026 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
5027 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
5028 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
5029
5030 /* See if we've got any unshadowed registers we can return now. */
5031 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
5032 if (fUnshadowedRegs)
5033 {
5034 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
5035 return (fPreferVolatile
5036 ? ASMBitFirstSetU32(fUnshadowedRegs)
5037 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
5038 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
5039 - 1;
5040 }
5041 }
5042 }
5043#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5044
5045 unsigned const idxReg = (fPreferVolatile
5046 ? ASMBitFirstSetU32(fRegs)
5047 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5048 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
5049 - 1;
5050
5051 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
5052 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
5053 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5054 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
5055 Assert(pReNative->Core.aHstSimdRegs[idxReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5056
5057 /* We need to flush any pending guest register writes this host SIMD register shadows. */
5058 uint32_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5059 uint32_t idxGstSimdReg = 0;
5060 do
5061 {
5062 if (fGstRegShadows & 0x1)
5063 *poff = iemNativeSimdRegFlushPendingWrite(pReNative, *poff, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5064 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg));
5065 idxGstSimdReg++;
5066 fGstRegShadows >>= 1;
5067 } while (fGstRegShadows);
5068
5069 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5070 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5071 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5072 pReNative->Core.aHstSimdRegs[idxReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5073 return idxReg;
5074 }
5075
5076 /*
5077 * Try free up a variable that's in a register.
5078 *
5079 * We do two rounds here, first evacuating variables we don't need to be
5080 * saved on the stack, then in the second round move things to the stack.
5081 */
5082 //STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
5083 AssertReleaseFailed(); /** @todo No variable support right now. */
5084#if 0
5085 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
5086 {
5087 uint32_t fVars = pReNative->Core.bmSimdVars;
5088 while (fVars)
5089 {
5090 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
5091 uint8_t const idxReg = pReNative->Core.aSimdVars[idxVar].idxReg;
5092 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
5093 && (RT_BIT_32(idxReg) & fRegMask)
5094 && ( iLoop == 0
5095 ? pReNative->Core.aSimdVars[idxVar].enmKind != kIemNativeVarKind_Stack
5096 : pReNative->Core.aSimdVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5097 && !pReNative->Core.aSimdVars[idxVar].fRegAcquired)
5098 {
5099 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
5100 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
5101 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5102 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5103 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
5104 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
5105
5106 if (pReNative->Core.aSimdVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5107 {
5108 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
5109 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
5110 }
5111
5112 pReNative->Core.aSimdVars[idxVar].idxReg = UINT8_MAX;
5113 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
5114
5115 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5116 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5117 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5118 return idxReg;
5119 }
5120 fVars &= ~RT_BIT_32(idxVar);
5121 }
5122 }
5123#endif
5124
5125 AssertFailed();
5126 return UINT8_MAX;
5127}
5128
5129
5130/**
5131 * Flushes a set of guest register shadow copies.
5132 *
5133 * This is usually done after calling a threaded function or a C-implementation
5134 * of an instruction.
5135 *
5136 * @param pReNative The native recompile state.
5137 * @param fGstSimdRegs Set of guest SIMD registers to flush.
5138 */
5139DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
5140{
5141 /*
5142 * Reduce the mask by what's currently shadowed
5143 */
5144 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
5145 fGstSimdRegs &= bmGstSimdRegShadows;
5146 if (fGstSimdRegs)
5147 {
5148 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
5149 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
5150 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
5151 if (bmGstSimdRegShadowsNew)
5152 {
5153 /*
5154 * Partial.
5155 */
5156 do
5157 {
5158 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5159 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5160 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5161 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5162 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5163 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5164
5165 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
5166 fGstSimdRegs &= ~fInThisHstReg;
5167 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5168 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5169 if (!fGstRegShadowsNew)
5170 {
5171 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5172 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5173 }
5174 } while (fGstSimdRegs != 0);
5175 }
5176 else
5177 {
5178 /*
5179 * Clear all.
5180 */
5181 do
5182 {
5183 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5184 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5185 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5186 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5187 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5188 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5189
5190 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5191 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
5192 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5193 } while (fGstSimdRegs != 0);
5194 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
5195 }
5196 }
5197}
5198
5199
5200/**
5201 * Allocates a temporary host SIMD register.
5202 *
5203 * This may emit code to save register content onto the stack in order to free
5204 * up a register.
5205 *
5206 * @returns The host register number; throws VBox status code on failure,
5207 * so no need to check the return value.
5208 * @param pReNative The native recompile state.
5209 * @param poff Pointer to the variable with the code buffer position.
5210 * This will be update if we need to move a variable from
5211 * register to stack in order to satisfy the request.
5212 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5213 * registers (@c true, default) or the other way around
5214 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5215 */
5216DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
5217{
5218 /*
5219 * Try find a completely unused register, preferably a call-volatile one.
5220 */
5221 uint8_t idxSimdReg;
5222 uint32_t fRegs = ~pReNative->Core.bmHstRegs
5223 & ~pReNative->Core.bmHstRegsWithGstShadow
5224 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
5225 if (fRegs)
5226 {
5227 if (fPreferVolatile)
5228 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5229 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5230 else
5231 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5232 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5233 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5234 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5235 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5236 }
5237 else
5238 {
5239 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
5240 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5241 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5242 }
5243
5244 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5245 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5246}
5247
5248
5249/**
5250 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
5251 * registers.
5252 *
5253 * @returns The host register number; throws VBox status code on failure,
5254 * so no need to check the return value.
5255 * @param pReNative The native recompile state.
5256 * @param poff Pointer to the variable with the code buffer position.
5257 * This will be update if we need to move a variable from
5258 * register to stack in order to satisfy the request.
5259 * @param fRegMask Mask of acceptable registers.
5260 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5261 * registers (@c true, default) or the other way around
5262 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5263 */
5264DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
5265 bool fPreferVolatile /*= true*/)
5266{
5267 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5268 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5269
5270 /*
5271 * Try find a completely unused register, preferably a call-volatile one.
5272 */
5273 uint8_t idxSimdReg;
5274 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
5275 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5276 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
5277 & fRegMask;
5278 if (fRegs)
5279 {
5280 if (fPreferVolatile)
5281 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5282 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5283 else
5284 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5285 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5286 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5287 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5288 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5289 }
5290 else
5291 {
5292 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
5293 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5294 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5295 }
5296
5297 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5298 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5299}
5300
5301
5302/**
5303 * Sets the indiactor for which part of the given SIMD register has valid data loaded.
5304 *
5305 * @param pReNative The native recompile state.
5306 * @param idxHstSimdReg The host SIMD register to update the state for.
5307 * @param enmLoadSz The load size to set.
5308 */
5309DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg,
5310 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5311{
5312 /* Everything valid already? -> nothing to do. */
5313 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5314 return;
5315
5316 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid)
5317 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
5318 else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz)
5319 {
5320 Assert( ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128
5321 && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5322 || ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128
5323 && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128));
5324 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256;
5325 }
5326}
5327
5328
5329static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdRegDst,
5330 uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
5331{
5332 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
5333 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst
5334 || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5335 {
5336# ifdef RT_ARCH_ARM64
5337 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
5338 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
5339# endif
5340
5341 if (idxHstSimdRegDst != idxHstSimdRegSrc)
5342 {
5343 switch (enmLoadSzDst)
5344 {
5345 case kIemNativeGstSimdRegLdStSz_256:
5346 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5347 break;
5348 case kIemNativeGstSimdRegLdStSz_Low128:
5349 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5350 break;
5351 case kIemNativeGstSimdRegLdStSz_High128:
5352 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst + 1, idxHstSimdRegSrc + 1);
5353 break;
5354 default:
5355 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5356 }
5357
5358 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst);
5359 }
5360 }
5361 else
5362 {
5363 /* Complicated stuff where the source is currently missing something, later. */
5364 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5365 }
5366
5367 return off;
5368}
5369
5370
5371/**
5372 * Allocates a temporary host SIMD register for keeping a guest
5373 * SIMD register value.
5374 *
5375 * Since we may already have a register holding the guest register value,
5376 * code will be emitted to do the loading if that's not the case. Code may also
5377 * be emitted if we have to free up a register to satify the request.
5378 *
5379 * @returns The host register number; throws VBox status code on failure, so no
5380 * need to check the return value.
5381 * @param pReNative The native recompile state.
5382 * @param poff Pointer to the variable with the code buffer
5383 * position. This will be update if we need to move a
5384 * variable from register to stack in order to satisfy
5385 * the request.
5386 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
5387 * @param enmIntendedUse How the caller will be using the host register.
5388 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
5389 * register is okay (default). The ASSUMPTION here is
5390 * that the caller has already flushed all volatile
5391 * registers, so this is only applied if we allocate a
5392 * new register.
5393 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
5394 */
5395DECL_HIDDEN_THROW(uint8_t)
5396iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
5397 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
5398 bool fNoVolatileRegs /*= false*/)
5399{
5400 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
5401#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
5402 AssertMsg( pReNative->idxCurCall == 0
5403 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5404 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5405 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
5406 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5407 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
5408 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
5409#endif
5410#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5411 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
5412#endif
5413 uint32_t const fRegMask = !fNoVolatileRegs
5414 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
5415 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
5416
5417 /*
5418 * First check if the guest register value is already in a host register.
5419 */
5420 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
5421 {
5422 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5423 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
5424 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
5425 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
5426
5427 /* It's not supposed to be allocated... */
5428 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
5429 {
5430 /*
5431 * If the register will trash the guest shadow copy, try find a
5432 * completely unused register we can use instead. If that fails,
5433 * we need to disassociate the host reg from the guest reg.
5434 */
5435 /** @todo would be nice to know if preserving the register is in any way helpful. */
5436 /* If the purpose is calculations, try duplicate the register value as
5437 we'll be clobbering the shadow. */
5438 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
5439 && ( ~pReNative->Core.bmHstSimdRegs
5440 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5441 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
5442 {
5443 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
5444
5445 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5446
5447 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5448 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5449 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5450 idxSimdReg = idxRegNew;
5451 }
5452 /* If the current register matches the restrictions, go ahead and allocate
5453 it for the caller. */
5454 else if (fRegMask & RT_BIT_32(idxSimdReg))
5455 {
5456 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5457 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
5458 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5459 {
5460 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5461 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxSimdReg, idxSimdReg, enmLoadSz);
5462 else
5463 iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz);
5464 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
5465 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5466 }
5467 else
5468 {
5469 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
5470 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
5471 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
5472 }
5473 }
5474 /* Otherwise, allocate a register that satisfies the caller and transfer
5475 the shadowing if compatible with the intended use. (This basically
5476 means the call wants a non-volatile register (RSP push/pop scenario).) */
5477 else
5478 {
5479 Assert(fNoVolatileRegs);
5480 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
5481 !fNoVolatileRegs
5482 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
5483 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5484 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5485 {
5486 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5487 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
5488 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
5489 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5490 }
5491 else
5492 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5493 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5494 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5495 idxSimdReg = idxRegNew;
5496 }
5497 }
5498 else
5499 {
5500 /*
5501 * Oops. Shadowed guest register already allocated!
5502 *
5503 * Allocate a new register, copy the value and, if updating, the
5504 * guest shadow copy assignment to the new register.
5505 */
5506 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5507 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
5508 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
5509 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
5510
5511 /** @todo share register for readonly access. */
5512 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
5513 enmIntendedUse == kIemNativeGstRegUse_Calculation);
5514
5515 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5516 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5517 else
5518 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5519
5520 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5521 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5522 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
5523 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5524 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5525 else
5526 {
5527 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5528 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
5529 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5530 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5531 }
5532 idxSimdReg = idxRegNew;
5533 }
5534 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
5535
5536#ifdef VBOX_STRICT
5537 /* Strict builds: Check that the value is correct. */
5538 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5539 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
5540#endif
5541
5542 return idxSimdReg;
5543 }
5544
5545 /*
5546 * Allocate a new register, load it with the guest value and designate it as a copy of the
5547 */
5548 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
5549
5550 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5551 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
5552 else
5553 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5554
5555 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5556 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
5557
5558 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
5559 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5560
5561 return idxRegNew;
5562}
5563
5564#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5565
5566
5567
5568/*********************************************************************************************************************************
5569* Code emitters for flushing pending guest register writes and sanity checks *
5570*********************************************************************************************************************************/
5571
5572#ifdef VBOX_STRICT
5573/**
5574 * Does internal register allocator sanity checks.
5575 */
5576DECLHIDDEN(void) iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
5577{
5578 /*
5579 * Iterate host registers building a guest shadowing set.
5580 */
5581 uint64_t bmGstRegShadows = 0;
5582 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
5583 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
5584 while (bmHstRegsWithGstShadow)
5585 {
5586 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
5587 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5588 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5589
5590 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5591 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
5592 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
5593 bmGstRegShadows |= fThisGstRegShadows;
5594 while (fThisGstRegShadows)
5595 {
5596 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
5597 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
5598 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
5599 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
5600 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
5601 }
5602 }
5603 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
5604 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
5605 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
5606
5607 /*
5608 * Now the other way around, checking the guest to host index array.
5609 */
5610 bmHstRegsWithGstShadow = 0;
5611 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
5612 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5613 while (bmGstRegShadows)
5614 {
5615 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
5616 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5617 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
5618
5619 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5620 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
5621 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
5622 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
5623 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5624 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
5625 }
5626 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
5627 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
5628 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
5629}
5630#endif /* VBOX_STRICT */
5631
5632
5633/**
5634 * Flushes any delayed guest register writes.
5635 *
5636 * This must be called prior to calling CImpl functions and any helpers that use
5637 * the guest state (like raising exceptions) and such.
5638 *
5639 * This optimization has not yet been implemented. The first target would be
5640 * RIP updates, since these are the most common ones.
5641 */
5642DECL_HIDDEN_THROW(uint32_t)
5643iemNativeRegFlushPendingWritesSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept, bool fFlushShadows)
5644{
5645#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5646 if (!(fGstShwExcept & kIemNativeGstReg_Pc))
5647 off = iemNativeEmitPcWriteback(pReNative, off);
5648#else
5649 RT_NOREF(pReNative, fGstShwExcept);
5650#endif
5651
5652#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5653 /** @todo r=bird: There must be a quicker way to check if anything needs
5654 * doing and then call simd function to do the flushing */
5655 /** @todo This doesn't mix well with fGstShwExcept but we ignore this for now and just flush everything. */
5656 for (uint8_t idxGstSimdReg = 0; idxGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo); idxGstSimdReg++)
5657 {
5658 Assert( (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg)
5659 || !IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg)));
5660
5661 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg))
5662 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5663
5664 if ( fFlushShadows
5665 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg))
5666 {
5667 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxGstSimdReg];
5668
5669 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
5670 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg)));
5671 }
5672 }
5673#else
5674 RT_NOREF(pReNative, fGstShwExcept, fFlushShadows);
5675#endif
5676
5677 return off;
5678}
5679
5680
5681#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5682/**
5683 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
5684 */
5685DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcWritebackSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5686{
5687 Assert(pReNative->Core.offPc);
5688# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5689 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5690 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, pReNative->Core.cInstrPcUpdateSkipped);
5691# endif
5692
5693# ifndef IEMNATIVE_REG_FIXED_PC_DBG
5694 /* Allocate a temporary PC register. */
5695 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5696
5697 /* Perform the addition and store the result. */
5698 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5699 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5700
5701 /* Free but don't flush the PC register. */
5702 iemNativeRegFreeTmp(pReNative, idxPcReg);
5703# else
5704 /* Compare the shadow with the context value, they should match. */
5705 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
5706 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
5707# endif
5708
5709 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, pReNative->Core.cInstrPcUpdateSkipped);
5710 pReNative->Core.offPc = 0;
5711 pReNative->Core.cInstrPcUpdateSkipped = 0;
5712
5713 return off;
5714}
5715#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
5716
5717
5718/*********************************************************************************************************************************
5719* Code Emitters (larger snippets) *
5720*********************************************************************************************************************************/
5721
5722/**
5723 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
5724 * extending to 64-bit width.
5725 *
5726 * @returns New code buffer offset on success, UINT32_MAX on failure.
5727 * @param pReNative .
5728 * @param off The current code buffer position.
5729 * @param idxHstReg The host register to load the guest register value into.
5730 * @param enmGstReg The guest register to load.
5731 *
5732 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5733 * that is something the caller needs to do if applicable.
5734 */
5735DECL_HIDDEN_THROW(uint32_t)
5736iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
5737{
5738 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
5739 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
5740
5741 switch (g_aGstShadowInfo[enmGstReg].cb)
5742 {
5743 case sizeof(uint64_t):
5744 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5745 case sizeof(uint32_t):
5746 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5747 case sizeof(uint16_t):
5748 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5749#if 0 /* not present in the table. */
5750 case sizeof(uint8_t):
5751 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5752#endif
5753 default:
5754 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5755 }
5756}
5757
5758
5759#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5760/**
5761 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
5762 *
5763 * @returns New code buffer offset on success, UINT32_MAX on failure.
5764 * @param pReNative The recompiler state.
5765 * @param off The current code buffer position.
5766 * @param idxHstSimdReg The host register to load the guest register value into.
5767 * @param enmGstSimdReg The guest register to load.
5768 * @param enmLoadSz The load size of the register.
5769 *
5770 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5771 * that is something the caller needs to do if applicable.
5772 */
5773DECL_HIDDEN_THROW(uint32_t)
5774iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
5775 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5776{
5777 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
5778
5779 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz);
5780 switch (enmLoadSz)
5781 {
5782 case kIemNativeGstSimdRegLdStSz_256:
5783 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5784 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5785 case kIemNativeGstSimdRegLdStSz_Low128:
5786 return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5787 case kIemNativeGstSimdRegLdStSz_High128:
5788 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5789 default:
5790 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5791 }
5792}
5793#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5794
5795#ifdef VBOX_STRICT
5796
5797/**
5798 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
5799 *
5800 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5801 * Trashes EFLAGS on AMD64.
5802 */
5803DECL_HIDDEN_THROW(uint32_t)
5804iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
5805{
5806# ifdef RT_ARCH_AMD64
5807 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
5808
5809 /* rol reg64, 32 */
5810 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5811 pbCodeBuf[off++] = 0xc1;
5812 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5813 pbCodeBuf[off++] = 32;
5814
5815 /* test reg32, ffffffffh */
5816 if (idxReg >= 8)
5817 pbCodeBuf[off++] = X86_OP_REX_B;
5818 pbCodeBuf[off++] = 0xf7;
5819 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5820 pbCodeBuf[off++] = 0xff;
5821 pbCodeBuf[off++] = 0xff;
5822 pbCodeBuf[off++] = 0xff;
5823 pbCodeBuf[off++] = 0xff;
5824
5825 /* je/jz +1 */
5826 pbCodeBuf[off++] = 0x74;
5827 pbCodeBuf[off++] = 0x01;
5828
5829 /* int3 */
5830 pbCodeBuf[off++] = 0xcc;
5831
5832 /* rol reg64, 32 */
5833 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5834 pbCodeBuf[off++] = 0xc1;
5835 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5836 pbCodeBuf[off++] = 32;
5837
5838# elif defined(RT_ARCH_ARM64)
5839 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5840 /* lsr tmp0, reg64, #32 */
5841 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
5842 /* cbz tmp0, +1 */
5843 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5844 /* brk #0x1100 */
5845 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
5846
5847# else
5848# error "Port me!"
5849# endif
5850 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5851 return off;
5852}
5853
5854
5855/**
5856 * Emitting code that checks that the content of register @a idxReg is the same
5857 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
5858 * instruction if that's not the case.
5859 *
5860 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5861 * Trashes EFLAGS on AMD64.
5862 */
5863DECL_HIDDEN_THROW(uint32_t)
5864iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
5865{
5866# ifdef RT_ARCH_AMD64
5867 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5868
5869 /* cmp reg, [mem] */
5870 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
5871 {
5872 if (idxReg >= 8)
5873 pbCodeBuf[off++] = X86_OP_REX_R;
5874 pbCodeBuf[off++] = 0x38;
5875 }
5876 else
5877 {
5878 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
5879 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
5880 else
5881 {
5882 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
5883 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5884 else
5885 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
5886 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
5887 if (idxReg >= 8)
5888 pbCodeBuf[off++] = X86_OP_REX_R;
5889 }
5890 pbCodeBuf[off++] = 0x39;
5891 }
5892 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
5893
5894 /* je/jz +1 */
5895 pbCodeBuf[off++] = 0x74;
5896 pbCodeBuf[off++] = 0x01;
5897
5898 /* int3 */
5899 pbCodeBuf[off++] = 0xcc;
5900
5901 /* For values smaller than the register size, we must check that the rest
5902 of the register is all zeros. */
5903 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
5904 {
5905 /* test reg64, imm32 */
5906 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5907 pbCodeBuf[off++] = 0xf7;
5908 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5909 pbCodeBuf[off++] = 0;
5910 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
5911 pbCodeBuf[off++] = 0xff;
5912 pbCodeBuf[off++] = 0xff;
5913
5914 /* je/jz +1 */
5915 pbCodeBuf[off++] = 0x74;
5916 pbCodeBuf[off++] = 0x01;
5917
5918 /* int3 */
5919 pbCodeBuf[off++] = 0xcc;
5920 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5921 }
5922 else
5923 {
5924 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5925 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
5926 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
5927 }
5928
5929# elif defined(RT_ARCH_ARM64)
5930 /* mov TMP0, [gstreg] */
5931 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
5932
5933 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5934 /* sub tmp0, tmp0, idxReg */
5935 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
5936 /* cbz tmp0, +1 */
5937 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5938 /* brk #0x1000+enmGstReg */
5939 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
5940 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5941
5942# else
5943# error "Port me!"
5944# endif
5945 return off;
5946}
5947
5948
5949# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5950/**
5951 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
5952 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
5953 * instruction if that's not the case.
5954 *
5955 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
5956 * Trashes EFLAGS on AMD64.
5957 */
5958DECL_HIDDEN_THROW(uint32_t)
5959iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
5960 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5961{
5962 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
5963 if ( ( enmLoadSz == kIemNativeGstSimdRegLdStSz_256
5964 && ( IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)
5965 || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
5966 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128
5967 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
5968 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_High128
5969 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
5970 return off;
5971
5972# ifdef RT_ARCH_AMD64
5973 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128); /** @todo 256-bit variant. */
5974
5975 /* movdqa vectmp0, idxSimdReg */
5976 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
5977
5978 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
5979
5980 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
5981 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5982 if (IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8)
5983 pbCodeBuf[off++] = X86_OP_REX_R;
5984 pbCodeBuf[off++] = 0x0f;
5985 pbCodeBuf[off++] = 0x38;
5986 pbCodeBuf[off++] = 0x29;
5987 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5988
5989 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
5990 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5991 pbCodeBuf[off++] = X86_OP_REX_W
5992 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
5993 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
5994 pbCodeBuf[off++] = 0x0f;
5995 pbCodeBuf[off++] = 0x3a;
5996 pbCodeBuf[off++] = 0x16;
5997 pbCodeBuf[off++] = 0xeb;
5998 pbCodeBuf[off++] = 0x00;
5999
6000 /* cmp tmp0, 0xffffffffffffffff. */
6001 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6002 pbCodeBuf[off++] = 0x83;
6003 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6004 pbCodeBuf[off++] = 0xff;
6005
6006 /* je/jz +1 */
6007 pbCodeBuf[off++] = 0x74;
6008 pbCodeBuf[off++] = 0x01;
6009
6010 /* int3 */
6011 pbCodeBuf[off++] = 0xcc;
6012
6013 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
6014 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6015 pbCodeBuf[off++] = X86_OP_REX_W
6016 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
6017 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6018 pbCodeBuf[off++] = 0x0f;
6019 pbCodeBuf[off++] = 0x3a;
6020 pbCodeBuf[off++] = 0x16;
6021 pbCodeBuf[off++] = 0xeb;
6022 pbCodeBuf[off++] = 0x01;
6023
6024 /* cmp tmp0, 0xffffffffffffffff. */
6025 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6026 pbCodeBuf[off++] = 0x83;
6027 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6028 pbCodeBuf[off++] = 0xff;
6029
6030 /* je/jz +1 */
6031 pbCodeBuf[off++] = 0x74;
6032 pbCodeBuf[off++] = 0x01;
6033
6034 /* int3 */
6035 pbCodeBuf[off++] = 0xcc;
6036
6037# elif defined(RT_ARCH_ARM64)
6038 /* mov vectmp0, [gstreg] */
6039 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
6040
6041 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6042 {
6043 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6044 /* eor vectmp0, vectmp0, idxSimdReg */
6045 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6046 /* cnt vectmp0, vectmp0, #0*/
6047 pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0);
6048 /* umov tmp0, vectmp0.D[0] */
6049 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6050 0 /*idxElem*/, kArmv8InstrUmovInsSz_U64);
6051 /* cbz tmp0, +1 */
6052 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6053 /* brk #0x1000+enmGstReg */
6054 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6055 }
6056
6057 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6058 {
6059 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6060 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
6061 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg);
6062 /* cnt vectmp0 + 1, vectmp0 + 1, #0*/
6063 pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1);
6064 /* umov tmp0, (vectmp0 + 1).D[0] */
6065 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
6066 0 /*idxElem*/, kArmv8InstrUmovInsSz_U64);
6067 /* cbz tmp0, +1 */
6068 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6069 /* brk #0x1000+enmGstReg */
6070 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6071 }
6072
6073# else
6074# error "Port me!"
6075# endif
6076
6077 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6078 return off;
6079}
6080# endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6081
6082
6083/**
6084 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
6085 * important bits.
6086 *
6087 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6088 * Trashes EFLAGS on AMD64.
6089 */
6090DECL_HIDDEN_THROW(uint32_t)
6091iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
6092{
6093 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6094 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
6095 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
6096 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
6097
6098#ifdef RT_ARCH_AMD64
6099 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6100
6101 /* je/jz +1 */
6102 pbCodeBuf[off++] = 0x74;
6103 pbCodeBuf[off++] = 0x01;
6104
6105 /* int3 */
6106 pbCodeBuf[off++] = 0xcc;
6107
6108# elif defined(RT_ARCH_ARM64)
6109 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6110
6111 /* b.eq +1 */
6112 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
6113 /* brk #0x2000 */
6114 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
6115
6116# else
6117# error "Port me!"
6118# endif
6119 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6120
6121 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6122 return off;
6123}
6124
6125#endif /* VBOX_STRICT */
6126
6127/**
6128 * Emits a code for checking the return code of a call and rcPassUp, returning
6129 * from the code if either are non-zero.
6130 */
6131DECL_HIDDEN_THROW(uint32_t)
6132iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6133{
6134#ifdef RT_ARCH_AMD64
6135 /*
6136 * AMD64: eax = call status code.
6137 */
6138
6139 /* edx = rcPassUp */
6140 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6141# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6142 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
6143# endif
6144
6145 /* edx = eax | rcPassUp */
6146 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6147 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
6148 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
6149 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6150
6151 /* Jump to non-zero status return path. */
6152 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
6153
6154 /* done. */
6155
6156#elif RT_ARCH_ARM64
6157 /*
6158 * ARM64: w0 = call status code.
6159 */
6160# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6161 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
6162# endif
6163 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6164
6165 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6166
6167 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
6168
6169 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6170 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6171 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
6172
6173#else
6174# error "port me"
6175#endif
6176 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6177 RT_NOREF_PV(idxInstr);
6178 return off;
6179}
6180
6181
6182/**
6183 * Emits code to check if the content of @a idxAddrReg is a canonical address,
6184 * raising a \#GP(0) if it isn't.
6185 *
6186 * @returns New code buffer offset, UINT32_MAX on failure.
6187 * @param pReNative The native recompile state.
6188 * @param off The code buffer offset.
6189 * @param idxAddrReg The host register with the address to check.
6190 * @param idxInstr The current instruction.
6191 */
6192DECL_HIDDEN_THROW(uint32_t)
6193iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
6194{
6195 /*
6196 * Make sure we don't have any outstanding guest register writes as we may
6197 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6198 */
6199 off = iemNativeRegFlushPendingWrites(pReNative, off);
6200
6201#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6202 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6203#else
6204 RT_NOREF(idxInstr);
6205#endif
6206
6207#ifdef RT_ARCH_AMD64
6208 /*
6209 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
6210 * return raisexcpt();
6211 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
6212 */
6213 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6214
6215 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
6216 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
6217 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
6218 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
6219 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6220
6221 iemNativeRegFreeTmp(pReNative, iTmpReg);
6222
6223#elif defined(RT_ARCH_ARM64)
6224 /*
6225 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
6226 * return raisexcpt();
6227 * ----
6228 * mov x1, 0x800000000000
6229 * add x1, x0, x1
6230 * cmp xzr, x1, lsr 48
6231 * b.ne .Lraisexcpt
6232 */
6233 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6234
6235 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
6236 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
6237 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
6238 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6239
6240 iemNativeRegFreeTmp(pReNative, iTmpReg);
6241
6242#else
6243# error "Port me"
6244#endif
6245 return off;
6246}
6247
6248
6249/**
6250 * Emits code to check if that the content of @a idxAddrReg is within the limit
6251 * of CS, raising a \#GP(0) if it isn't.
6252 *
6253 * @returns New code buffer offset; throws VBox status code on error.
6254 * @param pReNative The native recompile state.
6255 * @param off The code buffer offset.
6256 * @param idxAddrReg The host register (32-bit) with the address to
6257 * check.
6258 * @param idxInstr The current instruction.
6259 */
6260DECL_HIDDEN_THROW(uint32_t)
6261iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6262 uint8_t idxAddrReg, uint8_t idxInstr)
6263{
6264 /*
6265 * Make sure we don't have any outstanding guest register writes as we may
6266 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6267 */
6268 off = iemNativeRegFlushPendingWrites(pReNative, off);
6269
6270#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6271 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6272#else
6273 RT_NOREF(idxInstr);
6274#endif
6275
6276 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
6277 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
6278 kIemNativeGstRegUse_ReadOnly);
6279
6280 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
6281 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6282
6283 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
6284 return off;
6285}
6286
6287
6288/**
6289 * Emits a call to a CImpl function or something similar.
6290 */
6291DECL_HIDDEN_THROW(uint32_t)
6292iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
6293 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
6294{
6295 /* Writeback everything. */
6296 off = iemNativeRegFlushPendingWrites(pReNative, off);
6297
6298 /*
6299 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
6300 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
6301 */
6302 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
6303 fGstShwFlush
6304 | RT_BIT_64(kIemNativeGstReg_Pc)
6305 | RT_BIT_64(kIemNativeGstReg_EFlags));
6306 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6307
6308 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6309
6310 /*
6311 * Load the parameters.
6312 */
6313#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
6314 /* Special code the hidden VBOXSTRICTRC pointer. */
6315 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6316 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6317 if (cAddParams > 0)
6318 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
6319 if (cAddParams > 1)
6320 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
6321 if (cAddParams > 2)
6322 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
6323 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6324
6325#else
6326 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6327 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6328 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6329 if (cAddParams > 0)
6330 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
6331 if (cAddParams > 1)
6332 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
6333 if (cAddParams > 2)
6334# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
6335 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
6336# else
6337 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
6338# endif
6339#endif
6340
6341 /*
6342 * Make the call.
6343 */
6344 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
6345
6346#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6347 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6348#endif
6349
6350 /*
6351 * Check the status code.
6352 */
6353 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
6354}
6355
6356
6357/**
6358 * Emits a call to a threaded worker function.
6359 */
6360DECL_HIDDEN_THROW(uint32_t)
6361iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6362{
6363 /* We don't know what the threaded function is doing so we must flush all pending writes. */
6364 off = iemNativeRegFlushPendingWrites(pReNative, off);
6365
6366 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
6367 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6368
6369#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6370 /* The threaded function may throw / long jmp, so set current instruction
6371 number if we're counting. */
6372 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6373#endif
6374
6375 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
6376
6377#ifdef RT_ARCH_AMD64
6378 /* Load the parameters and emit the call. */
6379# ifdef RT_OS_WINDOWS
6380# ifndef VBOXSTRICTRC_STRICT_ENABLED
6381 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6382 if (cParams > 0)
6383 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
6384 if (cParams > 1)
6385 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
6386 if (cParams > 2)
6387 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
6388# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
6389 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
6390 if (cParams > 0)
6391 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
6392 if (cParams > 1)
6393 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
6394 if (cParams > 2)
6395 {
6396 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
6397 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
6398 }
6399 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6400# endif /* VBOXSTRICTRC_STRICT_ENABLED */
6401# else
6402 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6403 if (cParams > 0)
6404 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
6405 if (cParams > 1)
6406 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
6407 if (cParams > 2)
6408 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
6409# endif
6410
6411 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6412
6413# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6414 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6415# endif
6416
6417#elif RT_ARCH_ARM64
6418 /*
6419 * ARM64:
6420 */
6421 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6422 if (cParams > 0)
6423 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
6424 if (cParams > 1)
6425 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
6426 if (cParams > 2)
6427 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
6428
6429 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6430
6431#else
6432# error "port me"
6433#endif
6434
6435 /*
6436 * Check the status code.
6437 */
6438 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
6439
6440 return off;
6441}
6442
6443#ifdef VBOX_WITH_STATISTICS
6444/**
6445 * Emits code to update the thread call statistics.
6446 */
6447DECL_INLINE_THROW(uint32_t)
6448iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6449{
6450 /*
6451 * Update threaded function stats.
6452 */
6453 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
6454 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
6455# if defined(RT_ARCH_ARM64)
6456 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6457 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6458 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
6459 iemNativeRegFreeTmp(pReNative, idxTmp1);
6460 iemNativeRegFreeTmp(pReNative, idxTmp2);
6461# else
6462 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
6463# endif
6464 return off;
6465}
6466#endif /* VBOX_WITH_STATISTICS */
6467
6468
6469/**
6470 * Emits the code at the CheckBranchMiss label.
6471 */
6472static uint32_t iemNativeEmitCheckBranchMiss(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6473{
6474 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_CheckBranchMiss);
6475 if (idxLabel != UINT32_MAX)
6476 {
6477 iemNativeLabelDefine(pReNative, idxLabel, off);
6478
6479 /* int iemNativeHlpCheckBranchMiss(PVMCPUCC pVCpu) */
6480 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6481 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpCheckBranchMiss);
6482
6483 /* jump back to the return sequence. */
6484 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6485 }
6486 return off;
6487}
6488
6489
6490/**
6491 * Emits the code at the NeedCsLimChecking label.
6492 */
6493static uint32_t iemNativeEmitNeedCsLimChecking(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6494{
6495 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NeedCsLimChecking);
6496 if (idxLabel != UINT32_MAX)
6497 {
6498 iemNativeLabelDefine(pReNative, idxLabel, off);
6499
6500 /* int iemNativeHlpNeedCsLimChecking(PVMCPUCC pVCpu) */
6501 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6502 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpNeedCsLimChecking);
6503
6504 /* jump back to the return sequence. */
6505 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6506 }
6507 return off;
6508}
6509
6510
6511/**
6512 * Emits the code at the ObsoleteTb label.
6513 */
6514static uint32_t iemNativeEmitObsoleteTb(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6515{
6516 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ObsoleteTb);
6517 if (idxLabel != UINT32_MAX)
6518 {
6519 iemNativeLabelDefine(pReNative, idxLabel, off);
6520
6521 /* int iemNativeHlpObsoleteTb(PVMCPUCC pVCpu) */
6522 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6523 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpObsoleteTb);
6524
6525 /* jump back to the return sequence. */
6526 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6527 }
6528 return off;
6529}
6530
6531
6532/**
6533 * Emits the code at the RaiseGP0 label.
6534 */
6535static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6536{
6537 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseGp0);
6538 if (idxLabel != UINT32_MAX)
6539 {
6540 iemNativeLabelDefine(pReNative, idxLabel, off);
6541
6542 /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu) */
6543 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6544 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
6545
6546 /* jump back to the return sequence. */
6547 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6548 }
6549 return off;
6550}
6551
6552
6553/**
6554 * Emits the code at the RaiseNm label.
6555 */
6556static uint32_t iemNativeEmitRaiseNm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6557{
6558 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseNm);
6559 if (idxLabel != UINT32_MAX)
6560 {
6561 iemNativeLabelDefine(pReNative, idxLabel, off);
6562
6563 /* iemNativeHlpExecRaiseNm(PVMCPUCC pVCpu) */
6564 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6565 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseNm);
6566
6567 /* jump back to the return sequence. */
6568 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6569 }
6570 return off;
6571}
6572
6573
6574/**
6575 * Emits the code at the RaiseUd label.
6576 */
6577static uint32_t iemNativeEmitRaiseUd(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6578{
6579 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseUd);
6580 if (idxLabel != UINT32_MAX)
6581 {
6582 iemNativeLabelDefine(pReNative, idxLabel, off);
6583
6584 /* iemNativeHlpExecRaiseUd(PVMCPUCC pVCpu) */
6585 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6586 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseUd);
6587
6588 /* jump back to the return sequence. */
6589 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6590 }
6591 return off;
6592}
6593
6594
6595/**
6596 * Emits the code at the RaiseMf label.
6597 */
6598static uint32_t iemNativeEmitRaiseMf(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6599{
6600 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseMf);
6601 if (idxLabel != UINT32_MAX)
6602 {
6603 iemNativeLabelDefine(pReNative, idxLabel, off);
6604
6605 /* iemNativeHlpExecRaiseMf(PVMCPUCC pVCpu) */
6606 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6607 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseMf);
6608
6609 /* jump back to the return sequence. */
6610 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6611 }
6612 return off;
6613}
6614
6615
6616/**
6617 * Emits the code at the RaiseXf label.
6618 */
6619static uint32_t iemNativeEmitRaiseXf(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6620{
6621 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseXf);
6622 if (idxLabel != UINT32_MAX)
6623 {
6624 iemNativeLabelDefine(pReNative, idxLabel, off);
6625
6626 /* iemNativeHlpExecRaiseXf(PVMCPUCC pVCpu) */
6627 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6628 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseXf);
6629
6630 /* jump back to the return sequence. */
6631 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6632 }
6633 return off;
6634}
6635
6636
6637/**
6638 * Emits the code at the ReturnWithFlags label (returns
6639 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
6640 */
6641static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6642{
6643 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
6644 if (idxLabel != UINT32_MAX)
6645 {
6646 iemNativeLabelDefine(pReNative, idxLabel, off);
6647
6648 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
6649
6650 /* jump back to the return sequence. */
6651 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6652 }
6653 return off;
6654}
6655
6656
6657/**
6658 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
6659 */
6660static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6661{
6662 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
6663 if (idxLabel != UINT32_MAX)
6664 {
6665 iemNativeLabelDefine(pReNative, idxLabel, off);
6666
6667 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
6668
6669 /* jump back to the return sequence. */
6670 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6671 }
6672 return off;
6673}
6674
6675
6676/**
6677 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
6678 */
6679static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6680{
6681 /*
6682 * Generate the rc + rcPassUp fiddling code if needed.
6683 */
6684 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6685 if (idxLabel != UINT32_MAX)
6686 {
6687 iemNativeLabelDefine(pReNative, idxLabel, off);
6688
6689 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
6690#ifdef RT_ARCH_AMD64
6691# ifdef RT_OS_WINDOWS
6692# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6693 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
6694# endif
6695 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6696 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
6697# else
6698 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6699 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
6700# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6701 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
6702# endif
6703# endif
6704# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6705 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
6706# endif
6707
6708#else
6709 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
6710 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6711 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
6712#endif
6713
6714 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
6715 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6716 }
6717 return off;
6718}
6719
6720
6721/**
6722 * Emits a standard epilog.
6723 */
6724static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
6725{
6726 *pidxReturnLabel = UINT32_MAX;
6727
6728 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
6729 off = iemNativeRegFlushPendingWrites(pReNative, off);
6730
6731 /*
6732 * Successful return, so clear the return register (eax, w0).
6733 */
6734 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
6735
6736 /*
6737 * Define label for common return point.
6738 */
6739 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
6740 *pidxReturnLabel = idxReturn;
6741
6742 /*
6743 * Restore registers and return.
6744 */
6745#ifdef RT_ARCH_AMD64
6746 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6747
6748 /* Reposition esp at the r15 restore point. */
6749 pbCodeBuf[off++] = X86_OP_REX_W;
6750 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
6751 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
6752 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
6753
6754 /* Pop non-volatile registers and return */
6755 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
6756 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
6757 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
6758 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
6759 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
6760 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
6761 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
6762 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
6763# ifdef RT_OS_WINDOWS
6764 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
6765 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
6766# endif
6767 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
6768 pbCodeBuf[off++] = 0xc9; /* leave */
6769 pbCodeBuf[off++] = 0xc3; /* ret */
6770 pbCodeBuf[off++] = 0xcc; /* int3 poison */
6771
6772#elif RT_ARCH_ARM64
6773 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6774
6775 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
6776 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
6777 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6778 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6779 IEMNATIVE_FRAME_VAR_SIZE / 8);
6780 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
6781 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6782 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6783 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6784 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6785 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6786 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6787 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6788 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6789 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6790 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6791 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6792
6793 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
6794 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
6795 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
6796 IEMNATIVE_FRAME_SAVE_REG_SIZE);
6797
6798 /* retab / ret */
6799# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
6800 if (1)
6801 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
6802 else
6803# endif
6804 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
6805
6806#else
6807# error "port me"
6808#endif
6809 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6810
6811 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
6812}
6813
6814
6815/**
6816 * Emits a standard prolog.
6817 */
6818static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6819{
6820#ifdef RT_ARCH_AMD64
6821 /*
6822 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
6823 * reserving 64 bytes for stack variables plus 4 non-register argument
6824 * slots. Fixed register assignment: xBX = pReNative;
6825 *
6826 * Since we always do the same register spilling, we can use the same
6827 * unwind description for all the code.
6828 */
6829 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6830 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
6831 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
6832 pbCodeBuf[off++] = 0x8b;
6833 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
6834 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
6835 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
6836# ifdef RT_OS_WINDOWS
6837 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
6838 pbCodeBuf[off++] = 0x8b;
6839 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
6840 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
6841 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
6842# else
6843 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
6844 pbCodeBuf[off++] = 0x8b;
6845 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
6846# endif
6847 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
6848 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
6849 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
6850 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
6851 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
6852 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
6853 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
6854 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
6855
6856# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6857 /* Save the frame pointer. */
6858 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
6859# endif
6860
6861 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
6862 X86_GREG_xSP,
6863 IEMNATIVE_FRAME_ALIGN_SIZE
6864 + IEMNATIVE_FRAME_VAR_SIZE
6865 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
6866 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
6867 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
6868 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
6869 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
6870
6871#elif RT_ARCH_ARM64
6872 /*
6873 * We set up a stack frame exactly like on x86, only we have to push the
6874 * return address our selves here. We save all non-volatile registers.
6875 */
6876 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
6877
6878# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
6879 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
6880 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
6881 * in any way conditional, so just emitting this instructions now and hoping for the best... */
6882 /* pacibsp */
6883 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
6884# endif
6885
6886 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
6887 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
6888 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6889 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6890 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
6891 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
6892 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6893 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6894 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6895 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6896 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6897 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6898 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6899 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6900 /* Save the BP and LR (ret address) registers at the top of the frame. */
6901 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6902 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6903 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6904 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
6905 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
6906 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
6907
6908 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
6909 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
6910
6911 /* mov r28, r0 */
6912 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
6913 /* mov r27, r1 */
6914 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
6915
6916# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6917 /* Save the frame pointer. */
6918 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
6919 ARMV8_A64_REG_X2);
6920# endif
6921
6922#else
6923# error "port me"
6924#endif
6925 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6926 return off;
6927}
6928
6929
6930/*********************************************************************************************************************************
6931* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
6932*********************************************************************************************************************************/
6933
6934/**
6935 * Internal work that allocates a variable with kind set to
6936 * kIemNativeVarKind_Invalid and no current stack allocation.
6937 *
6938 * The kind will either be set by the caller or later when the variable is first
6939 * assigned a value.
6940 *
6941 * @returns Unpacked index.
6942 * @internal
6943 */
6944static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
6945{
6946 Assert(cbType > 0 && cbType <= 64);
6947 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
6948 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
6949 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
6950 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
6951 pReNative->Core.aVars[idxVar].cbVar = cbType;
6952 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
6953 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6954 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
6955 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
6956 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
6957 pReNative->Core.aVars[idxVar].fRegAcquired = false;
6958 pReNative->Core.aVars[idxVar].u.uValue = 0;
6959 return idxVar;
6960}
6961
6962
6963/**
6964 * Internal work that allocates an argument variable w/o setting enmKind.
6965 *
6966 * @returns Unpacked index.
6967 * @internal
6968 */
6969static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
6970{
6971 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
6972 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
6973 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
6974
6975 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
6976 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
6977 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
6978 return idxVar;
6979}
6980
6981
6982/**
6983 * Gets the stack slot for a stack variable, allocating one if necessary.
6984 *
6985 * Calling this function implies that the stack slot will contain a valid
6986 * variable value. The caller deals with any register currently assigned to the
6987 * variable, typically by spilling it into the stack slot.
6988 *
6989 * @returns The stack slot number.
6990 * @param pReNative The recompiler state.
6991 * @param idxVar The variable.
6992 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
6993 */
6994DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6995{
6996 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6997 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
6998 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
6999
7000 /* Already got a slot? */
7001 uint8_t const idxStackSlot = pVar->idxStackSlot;
7002 if (idxStackSlot != UINT8_MAX)
7003 {
7004 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
7005 return idxStackSlot;
7006 }
7007
7008 /*
7009 * A single slot is easy to allocate.
7010 * Allocate them from the top end, closest to BP, to reduce the displacement.
7011 */
7012 if (pVar->cbVar <= sizeof(uint64_t))
7013 {
7014 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7015 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7016 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
7017 pVar->idxStackSlot = (uint8_t)iSlot;
7018 Log11(("iemNativeVarSetKindToStack: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
7019 return (uint8_t)iSlot;
7020 }
7021
7022 /*
7023 * We need more than one stack slot.
7024 *
7025 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
7026 */
7027 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
7028 Assert(pVar->cbVar <= 64);
7029 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
7030 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
7031 uint32_t bmStack = ~pReNative->Core.bmStack;
7032 while (bmStack != UINT32_MAX)
7033 {
7034/** @todo allocate from the top to reduce BP displacement. */
7035 unsigned const iSlot = ASMBitFirstSetU32(bmStack) - 1;
7036 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7037 if (!(iSlot & fBitAlignMask))
7038 {
7039 if ((bmStack & (fBitAllocMask << iSlot)) == (fBitAllocMask << iSlot))
7040 {
7041 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
7042 pVar->idxStackSlot = (uint8_t)iSlot;
7043 Log11(("iemNativeVarSetKindToStack: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7044 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
7045 return (uint8_t)iSlot;
7046 }
7047 }
7048 bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);
7049 }
7050 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7051}
7052
7053
7054/**
7055 * Changes the variable to a stack variable.
7056 *
7057 * Currently this is s only possible to do the first time the variable is used,
7058 * switching later is can be implemented but not done.
7059 *
7060 * @param pReNative The recompiler state.
7061 * @param idxVar The variable.
7062 * @throws VERR_IEM_VAR_IPE_2
7063 */
7064DECL_HIDDEN_THROW(void) iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7065{
7066 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7067 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7068 if (pVar->enmKind != kIemNativeVarKind_Stack)
7069 {
7070 /* We could in theory transition from immediate to stack as well, but it
7071 would involve the caller doing work storing the value on the stack. So,
7072 till that's required we only allow transition from invalid. */
7073 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7074 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7075 pVar->enmKind = kIemNativeVarKind_Stack;
7076
7077 /* Note! We don't allocate a stack slot here, that's only done when a
7078 slot is actually needed to hold a variable value. */
7079 }
7080}
7081
7082
7083/**
7084 * Sets it to a variable with a constant value.
7085 *
7086 * This does not require stack storage as we know the value and can always
7087 * reload it, unless of course it's referenced.
7088 *
7089 * @param pReNative The recompiler state.
7090 * @param idxVar The variable.
7091 * @param uValue The immediate value.
7092 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7093 */
7094DECL_HIDDEN_THROW(void) iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
7095{
7096 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7097 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7098 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7099 {
7100 /* Only simple transitions for now. */
7101 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7102 pVar->enmKind = kIemNativeVarKind_Immediate;
7103 }
7104 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7105
7106 pVar->u.uValue = uValue;
7107 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
7108 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
7109 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
7110}
7111
7112
7113/**
7114 * Sets the variable to a reference (pointer) to @a idxOtherVar.
7115 *
7116 * This does not require stack storage as we know the value and can always
7117 * reload it. Loading is postponed till needed.
7118 *
7119 * @param pReNative The recompiler state.
7120 * @param idxVar The variable. Unpacked.
7121 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
7122 *
7123 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7124 * @internal
7125 */
7126static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
7127{
7128 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
7129 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
7130
7131 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
7132 {
7133 /* Only simple transitions for now. */
7134 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7135 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7136 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
7137 }
7138 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7139
7140 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
7141
7142 /* Update the other variable, ensure it's a stack variable. */
7143 /** @todo handle variables with const values... that'll go boom now. */
7144 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
7145 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
7146}
7147
7148
7149/**
7150 * Sets the variable to a reference (pointer) to a guest register reference.
7151 *
7152 * This does not require stack storage as we know the value and can always
7153 * reload it. Loading is postponed till needed.
7154 *
7155 * @param pReNative The recompiler state.
7156 * @param idxVar The variable.
7157 * @param enmRegClass The class guest registers to reference.
7158 * @param idxReg The register within @a enmRegClass to reference.
7159 *
7160 * @throws VERR_IEM_VAR_IPE_2
7161 */
7162DECL_HIDDEN_THROW(void) iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
7163 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
7164{
7165 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7166 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7167
7168 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
7169 {
7170 /* Only simple transitions for now. */
7171 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7172 pVar->enmKind = kIemNativeVarKind_GstRegRef;
7173 }
7174 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7175
7176 pVar->u.GstRegRef.enmClass = enmRegClass;
7177 pVar->u.GstRegRef.idx = idxReg;
7178}
7179
7180
7181DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7182{
7183 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7184}
7185
7186
7187DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
7188{
7189 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7190
7191 /* Since we're using a generic uint64_t value type, we must truncate it if
7192 the variable is smaller otherwise we may end up with too large value when
7193 scaling up a imm8 w/ sign-extension.
7194
7195 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
7196 in the bios, bx=1) when running on arm, because clang expect 16-bit
7197 register parameters to have bits 16 and up set to zero. Instead of
7198 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
7199 CF value in the result. */
7200 switch (cbType)
7201 {
7202 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7203 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7204 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7205 }
7206 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7207 return idxVar;
7208}
7209
7210
7211DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
7212{
7213 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
7214 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
7215 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
7216 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
7217 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
7218 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7219
7220 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
7221 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
7222 return idxArgVar;
7223}
7224
7225
7226DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7227{
7228 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7229 /* Don't set to stack now, leave that to the first use as for instance
7230 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
7231 return idxVar;
7232}
7233
7234
7235DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
7236{
7237 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7238
7239 /* Since we're using a generic uint64_t value type, we must truncate it if
7240 the variable is smaller otherwise we may end up with too large value when
7241 scaling up a imm8 w/ sign-extension. */
7242 switch (cbType)
7243 {
7244 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7245 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7246 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7247 }
7248 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7249 return idxVar;
7250}
7251
7252
7253/**
7254 * Makes sure variable @a idxVar has a register assigned to it and that it stays
7255 * fixed till we call iemNativeVarRegisterRelease.
7256 *
7257 * @returns The host register number.
7258 * @param pReNative The recompiler state.
7259 * @param idxVar The variable.
7260 * @param poff Pointer to the instruction buffer offset.
7261 * In case a register needs to be freed up or the value
7262 * loaded off the stack.
7263 * @param fInitialized Set if the variable must already have been initialized.
7264 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7265 * the case.
7266 * @param idxRegPref Preferred register number or UINT8_MAX.
7267 */
7268DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7269 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7270{
7271 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7272 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7273 Assert(pVar->cbVar <= 8);
7274 Assert(!pVar->fRegAcquired);
7275
7276 uint8_t idxReg = pVar->idxReg;
7277 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7278 {
7279 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7280 && pVar->enmKind < kIemNativeVarKind_End);
7281 pVar->fRegAcquired = true;
7282 return idxReg;
7283 }
7284
7285 /*
7286 * If the kind of variable has not yet been set, default to 'stack'.
7287 */
7288 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7289 && pVar->enmKind < kIemNativeVarKind_End);
7290 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7291 iemNativeVarSetKindToStack(pReNative, idxVar);
7292
7293 /*
7294 * We have to allocate a register for the variable, even if its a stack one
7295 * as we don't know if there are modification being made to it before its
7296 * finalized (todo: analyze and insert hints about that?).
7297 *
7298 * If we can, we try get the correct register for argument variables. This
7299 * is assuming that most argument variables are fetched as close as possible
7300 * to the actual call, so that there aren't any interfering hidden calls
7301 * (memory accesses, etc) inbetween.
7302 *
7303 * If we cannot or it's a variable, we make sure no argument registers
7304 * that will be used by this MC block will be allocated here, and we always
7305 * prefer non-volatile registers to avoid needing to spill stuff for internal
7306 * call.
7307 */
7308 /** @todo Detect too early argument value fetches and warn about hidden
7309 * calls causing less optimal code to be generated in the python script. */
7310
7311 uint8_t const uArgNo = pVar->uArgNo;
7312 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
7313 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
7314 {
7315 idxReg = g_aidxIemNativeCallRegs[uArgNo];
7316 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7317 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
7318 }
7319 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
7320 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
7321 {
7322 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7323 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
7324 & ~pReNative->Core.bmHstRegsWithGstShadow
7325 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
7326 & fNotArgsMask;
7327 if (fRegs)
7328 {
7329 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
7330 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
7331 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
7332 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
7333 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
7334 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7335 }
7336 else
7337 {
7338 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7339 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
7340 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7341 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7342 }
7343 }
7344 else
7345 {
7346 idxReg = idxRegPref;
7347 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7348 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7349 }
7350 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7351 pVar->idxReg = idxReg;
7352
7353 /*
7354 * Load it off the stack if we've got a stack slot.
7355 */
7356 uint8_t const idxStackSlot = pVar->idxStackSlot;
7357 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7358 {
7359 Assert(fInitialized);
7360 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7361 switch (pVar->cbVar)
7362 {
7363 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
7364 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
7365 case 3: AssertFailed(); RT_FALL_THRU();
7366 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
7367 default: AssertFailed(); RT_FALL_THRU();
7368 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
7369 }
7370 }
7371 else
7372 {
7373 Assert(idxStackSlot == UINT8_MAX);
7374 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7375 }
7376 pVar->fRegAcquired = true;
7377 return idxReg;
7378}
7379
7380
7381/**
7382 * The value of variable @a idxVar will be written in full to the @a enmGstReg
7383 * guest register.
7384 *
7385 * This function makes sure there is a register for it and sets it to be the
7386 * current shadow copy of @a enmGstReg.
7387 *
7388 * @returns The host register number.
7389 * @param pReNative The recompiler state.
7390 * @param idxVar The variable.
7391 * @param enmGstReg The guest register this variable will be written to
7392 * after this call.
7393 * @param poff Pointer to the instruction buffer offset.
7394 * In case a register needs to be freed up or if the
7395 * variable content needs to be loaded off the stack.
7396 *
7397 * @note We DO NOT expect @a idxVar to be an argument variable,
7398 * because we can only in the commit stage of an instruction when this
7399 * function is used.
7400 */
7401DECL_HIDDEN_THROW(uint8_t)
7402iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
7403{
7404 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7405 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7406 Assert(!pVar->fRegAcquired);
7407 AssertMsgStmt( pVar->cbVar <= 8
7408 && ( pVar->enmKind == kIemNativeVarKind_Immediate
7409 || pVar->enmKind == kIemNativeVarKind_Stack),
7410 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
7411 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
7412 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7413
7414 /*
7415 * This shouldn't ever be used for arguments, unless it's in a weird else
7416 * branch that doesn't do any calling and even then it's questionable.
7417 *
7418 * However, in case someone writes crazy wrong MC code and does register
7419 * updates before making calls, just use the regular register allocator to
7420 * ensure we get a register suitable for the intended argument number.
7421 */
7422 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
7423
7424 /*
7425 * If there is already a register for the variable, we transfer/set the
7426 * guest shadow copy assignment to it.
7427 */
7428 uint8_t idxReg = pVar->idxReg;
7429 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7430 {
7431 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
7432 {
7433 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
7434 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
7435 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
7436 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
7437 }
7438 else
7439 {
7440 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
7441 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
7442 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
7443 }
7444 /** @todo figure this one out. We need some way of making sure the register isn't
7445 * modified after this point, just in case we start writing crappy MC code. */
7446 pVar->enmGstReg = enmGstReg;
7447 pVar->fRegAcquired = true;
7448 return idxReg;
7449 }
7450 Assert(pVar->uArgNo == UINT8_MAX);
7451
7452 /*
7453 * Because this is supposed to be the commit stage, we're just tag along with the
7454 * temporary register allocator and upgrade it to a variable register.
7455 */
7456 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
7457 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
7458 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
7459 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
7460 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
7461 pVar->idxReg = idxReg;
7462
7463 /*
7464 * Now we need to load the register value.
7465 */
7466 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7467 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7468 else
7469 {
7470 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7471 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7472 switch (pVar->cbVar)
7473 {
7474 case sizeof(uint64_t):
7475 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
7476 break;
7477 case sizeof(uint32_t):
7478 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
7479 break;
7480 case sizeof(uint16_t):
7481 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
7482 break;
7483 case sizeof(uint8_t):
7484 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
7485 break;
7486 default:
7487 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7488 }
7489 }
7490
7491 pVar->fRegAcquired = true;
7492 return idxReg;
7493}
7494
7495
7496/**
7497 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
7498 *
7499 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
7500 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
7501 * requirement of flushing anything in volatile host registers when making a
7502 * call.
7503 *
7504 * @returns New @a off value.
7505 * @param pReNative The recompiler state.
7506 * @param off The code buffer position.
7507 * @param fHstRegsNotToSave Set of registers not to save & restore.
7508 */
7509DECL_HIDDEN_THROW(uint32_t)
7510iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7511{
7512 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7513 if (fHstRegs)
7514 {
7515 do
7516 {
7517 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7518 fHstRegs &= ~RT_BIT_32(idxHstReg);
7519
7520 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7521 {
7522 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7523 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7524 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7525 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7526 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7527 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7528 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7529 {
7530 case kIemNativeVarKind_Stack:
7531 {
7532 /* Temporarily spill the variable register. */
7533 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7534 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7535 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7536 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7537 continue;
7538 }
7539
7540 case kIemNativeVarKind_Immediate:
7541 case kIemNativeVarKind_VarRef:
7542 case kIemNativeVarKind_GstRegRef:
7543 /* It is weird to have any of these loaded at this point. */
7544 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7545 continue;
7546
7547 case kIemNativeVarKind_End:
7548 case kIemNativeVarKind_Invalid:
7549 break;
7550 }
7551 AssertFailed();
7552 }
7553 else
7554 {
7555 /*
7556 * Allocate a temporary stack slot and spill the register to it.
7557 */
7558 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7559 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
7560 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7561 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
7562 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
7563 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7564 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7565 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7566 }
7567 } while (fHstRegs);
7568 }
7569 return off;
7570}
7571
7572
7573/**
7574 * Emit code to restore volatile registers after to a call to a helper.
7575 *
7576 * @returns New @a off value.
7577 * @param pReNative The recompiler state.
7578 * @param off The code buffer position.
7579 * @param fHstRegsNotToSave Set of registers not to save & restore.
7580 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
7581 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
7582 */
7583DECL_HIDDEN_THROW(uint32_t)
7584iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7585{
7586 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7587 if (fHstRegs)
7588 {
7589 do
7590 {
7591 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7592 fHstRegs &= ~RT_BIT_32(idxHstReg);
7593
7594 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7595 {
7596 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7597 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7598 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7599 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7600 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7601 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7602 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7603 {
7604 case kIemNativeVarKind_Stack:
7605 {
7606 /* Unspill the variable register. */
7607 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7608 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
7609 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7610 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7611 continue;
7612 }
7613
7614 case kIemNativeVarKind_Immediate:
7615 case kIemNativeVarKind_VarRef:
7616 case kIemNativeVarKind_GstRegRef:
7617 /* It is weird to have any of these loaded at this point. */
7618 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7619 continue;
7620
7621 case kIemNativeVarKind_End:
7622 case kIemNativeVarKind_Invalid:
7623 break;
7624 }
7625 AssertFailed();
7626 }
7627 else
7628 {
7629 /*
7630 * Restore from temporary stack slot.
7631 */
7632 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
7633 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
7634 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
7635 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
7636
7637 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7638 }
7639 } while (fHstRegs);
7640 }
7641 return off;
7642}
7643
7644
7645/**
7646 * Worker that frees the stack slots for variable @a idxVar if any allocated.
7647 *
7648 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
7649 *
7650 * ASSUMES that @a idxVar is valid and unpacked.
7651 */
7652DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7653{
7654 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
7655 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
7656 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7657 {
7658 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
7659 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
7660 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
7661 Assert(cSlots > 0);
7662 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
7663 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7664 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
7665 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
7666 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7667 }
7668 else
7669 Assert(idxStackSlot == UINT8_MAX);
7670}
7671
7672
7673/**
7674 * Worker that frees a single variable.
7675 *
7676 * ASSUMES that @a idxVar is valid and unpacked.
7677 */
7678DECLHIDDEN(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7679{
7680 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
7681 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
7682 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
7683
7684 /* Free the host register first if any assigned. */
7685 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7686 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7687 {
7688 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
7689 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
7690 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7691 }
7692
7693 /* Free argument mapping. */
7694 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
7695 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
7696 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
7697
7698 /* Free the stack slots. */
7699 iemNativeVarFreeStackSlots(pReNative, idxVar);
7700
7701 /* Free the actual variable. */
7702 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7703 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
7704}
7705
7706
7707/**
7708 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
7709 */
7710DECLHIDDEN(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
7711{
7712 while (bmVars != 0)
7713 {
7714 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
7715 bmVars &= ~RT_BIT_32(idxVar);
7716
7717#if 1 /** @todo optimize by simplifying this later... */
7718 iemNativeVarFreeOneWorker(pReNative, idxVar);
7719#else
7720 /* Only need to free the host register, the rest is done as bulk updates below. */
7721 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7722 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7723 {
7724 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
7725 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
7726 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7727 }
7728#endif
7729 }
7730#if 0 /** @todo optimize by simplifying this later... */
7731 pReNative->Core.bmVars = 0;
7732 pReNative->Core.bmStack = 0;
7733 pReNative->Core.u64ArgVars = UINT64_MAX;
7734#endif
7735}
7736
7737
7738
7739/*********************************************************************************************************************************
7740* Emitters for IEM_MC_CALL_CIMPL_XXX *
7741*********************************************************************************************************************************/
7742
7743/**
7744 * Emits code to load a reference to the given guest register into @a idxGprDst.
7745 */
7746DECL_INLINE_THROW(uint32_t)
7747iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
7748 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
7749{
7750#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7751 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
7752#endif
7753
7754 /*
7755 * Get the offset relative to the CPUMCTX structure.
7756 */
7757 uint32_t offCpumCtx;
7758 switch (enmClass)
7759 {
7760 case kIemNativeGstRegRef_Gpr:
7761 Assert(idxRegInClass < 16);
7762 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
7763 break;
7764
7765 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
7766 Assert(idxRegInClass < 4);
7767 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
7768 break;
7769
7770 case kIemNativeGstRegRef_EFlags:
7771 Assert(idxRegInClass == 0);
7772 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
7773 break;
7774
7775 case kIemNativeGstRegRef_MxCsr:
7776 Assert(idxRegInClass == 0);
7777 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
7778 break;
7779
7780 case kIemNativeGstRegRef_FpuReg:
7781 Assert(idxRegInClass < 8);
7782 AssertFailed(); /** @todo what kind of indexing? */
7783 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
7784 break;
7785
7786 case kIemNativeGstRegRef_MReg:
7787 Assert(idxRegInClass < 8);
7788 AssertFailed(); /** @todo what kind of indexing? */
7789 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
7790 break;
7791
7792 case kIemNativeGstRegRef_XReg:
7793 Assert(idxRegInClass < 16);
7794 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
7795 break;
7796
7797 default:
7798 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
7799 }
7800
7801 /*
7802 * Load the value into the destination register.
7803 */
7804#ifdef RT_ARCH_AMD64
7805 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
7806
7807#elif defined(RT_ARCH_ARM64)
7808 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7809 Assert(offCpumCtx < 4096);
7810 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
7811
7812#else
7813# error "Port me!"
7814#endif
7815
7816 return off;
7817}
7818
7819
7820/**
7821 * Common code for CIMPL and AIMPL calls.
7822 *
7823 * These are calls that uses argument variables and such. They should not be
7824 * confused with internal calls required to implement an MC operation,
7825 * like a TLB load and similar.
7826 *
7827 * Upon return all that is left to do is to load any hidden arguments and
7828 * perform the call. All argument variables are freed.
7829 *
7830 * @returns New code buffer offset; throws VBox status code on error.
7831 * @param pReNative The native recompile state.
7832 * @param off The code buffer offset.
7833 * @param cArgs The total nubmer of arguments (includes hidden
7834 * count).
7835 * @param cHiddenArgs The number of hidden arguments. The hidden
7836 * arguments must not have any variable declared for
7837 * them, whereas all the regular arguments must
7838 * (tstIEMCheckMc ensures this).
7839 */
7840DECL_HIDDEN_THROW(uint32_t)
7841iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
7842{
7843#ifdef VBOX_STRICT
7844 /*
7845 * Assert sanity.
7846 */
7847 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
7848 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
7849 for (unsigned i = 0; i < cHiddenArgs; i++)
7850 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
7851 for (unsigned i = cHiddenArgs; i < cArgs; i++)
7852 {
7853 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
7854 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
7855 }
7856 iemNativeRegAssertSanity(pReNative);
7857#endif
7858
7859 /* We don't know what the called function makes use of, so flush any pending register writes. */
7860 off = iemNativeRegFlushPendingWrites(pReNative, off);
7861
7862 /*
7863 * Before we do anything else, go over variables that are referenced and
7864 * make sure they are not in a register.
7865 */
7866 uint32_t bmVars = pReNative->Core.bmVars;
7867 if (bmVars)
7868 {
7869 do
7870 {
7871 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
7872 bmVars &= ~RT_BIT_32(idxVar);
7873
7874 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
7875 {
7876 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
7877 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
7878 {
7879 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
7880 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
7881 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
7882 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7883 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
7884
7885 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7886 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
7887 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
7888 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
7889 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
7890 }
7891 }
7892 } while (bmVars != 0);
7893#if 0 //def VBOX_STRICT
7894 iemNativeRegAssertSanity(pReNative);
7895#endif
7896 }
7897
7898 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
7899
7900 /*
7901 * First, go over the host registers that will be used for arguments and make
7902 * sure they either hold the desired argument or are free.
7903 */
7904 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
7905 {
7906 for (uint32_t i = 0; i < cRegArgs; i++)
7907 {
7908 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
7909 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
7910 {
7911 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
7912 {
7913 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
7914 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7915 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7916 Assert(pVar->idxReg == idxArgReg);
7917 uint8_t const uArgNo = pVar->uArgNo;
7918 if (uArgNo == i)
7919 { /* prefect */ }
7920 /* The variable allocator logic should make sure this is impossible,
7921 except for when the return register is used as a parameter (ARM,
7922 but not x86). */
7923#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
7924 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
7925 {
7926# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
7927# error "Implement this"
7928# endif
7929 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
7930 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
7931 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
7932 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
7933 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
7934 }
7935#endif
7936 else
7937 {
7938 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
7939
7940 if (pVar->enmKind == kIemNativeVarKind_Stack)
7941 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
7942 else
7943 {
7944 /* just free it, can be reloaded if used again */
7945 pVar->idxReg = UINT8_MAX;
7946 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
7947 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
7948 }
7949 }
7950 }
7951 else
7952 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
7953 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
7954 }
7955 }
7956#if 0 //def VBOX_STRICT
7957 iemNativeRegAssertSanity(pReNative);
7958#endif
7959 }
7960
7961 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
7962
7963#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
7964 /*
7965 * If there are any stack arguments, make sure they are in their place as well.
7966 *
7967 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
7968 * the caller) be loading it later and it must be free (see first loop).
7969 */
7970 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
7971 {
7972 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
7973 {
7974 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
7975 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
7976 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7977 {
7978 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
7979 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
7980 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
7981 pVar->idxReg = UINT8_MAX;
7982 }
7983 else
7984 {
7985 /* Use ARG0 as temp for stuff we need registers for. */
7986 switch (pVar->enmKind)
7987 {
7988 case kIemNativeVarKind_Stack:
7989 {
7990 uint8_t const idxStackSlot = pVar->idxStackSlot;
7991 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7992 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
7993 iemNativeStackCalcBpDisp(idxStackSlot));
7994 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
7995 continue;
7996 }
7997
7998 case kIemNativeVarKind_Immediate:
7999 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
8000 continue;
8001
8002 case kIemNativeVarKind_VarRef:
8003 {
8004 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8005 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8006 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8007 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8008 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8009 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8010 {
8011 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8012 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8013 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8014 }
8015 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8016 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8017 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
8018 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8019 continue;
8020 }
8021
8022 case kIemNativeVarKind_GstRegRef:
8023 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
8024 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8025 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8026 continue;
8027
8028 case kIemNativeVarKind_Invalid:
8029 case kIemNativeVarKind_End:
8030 break;
8031 }
8032 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8033 }
8034 }
8035# if 0 //def VBOX_STRICT
8036 iemNativeRegAssertSanity(pReNative);
8037# endif
8038 }
8039#else
8040 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
8041#endif
8042
8043 /*
8044 * Make sure the argument variables are loaded into their respective registers.
8045 *
8046 * We can optimize this by ASSUMING that any register allocations are for
8047 * registeres that have already been loaded and are ready. The previous step
8048 * saw to that.
8049 */
8050 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
8051 {
8052 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8053 {
8054 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8055 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8056 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
8057 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
8058 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
8059 else
8060 {
8061 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8062 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8063 {
8064 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
8065 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
8066 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
8067 | RT_BIT_32(idxArgReg);
8068 pVar->idxReg = idxArgReg;
8069 }
8070 else
8071 {
8072 /* Use ARG0 as temp for stuff we need registers for. */
8073 switch (pVar->enmKind)
8074 {
8075 case kIemNativeVarKind_Stack:
8076 {
8077 uint8_t const idxStackSlot = pVar->idxStackSlot;
8078 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8079 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
8080 continue;
8081 }
8082
8083 case kIemNativeVarKind_Immediate:
8084 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
8085 continue;
8086
8087 case kIemNativeVarKind_VarRef:
8088 {
8089 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8090 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8091 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
8092 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8093 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8094 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8095 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8096 {
8097 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8098 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8099 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8100 }
8101 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8102 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8103 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
8104 continue;
8105 }
8106
8107 case kIemNativeVarKind_GstRegRef:
8108 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
8109 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8110 continue;
8111
8112 case kIemNativeVarKind_Invalid:
8113 case kIemNativeVarKind_End:
8114 break;
8115 }
8116 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8117 }
8118 }
8119 }
8120#if 0 //def VBOX_STRICT
8121 iemNativeRegAssertSanity(pReNative);
8122#endif
8123 }
8124#ifdef VBOX_STRICT
8125 else
8126 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8127 {
8128 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
8129 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
8130 }
8131#endif
8132
8133 /*
8134 * Free all argument variables (simplified).
8135 * Their lifetime always expires with the call they are for.
8136 */
8137 /** @todo Make the python script check that arguments aren't used after
8138 * IEM_MC_CALL_XXXX. */
8139 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
8140 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
8141 * an argument value. There is also some FPU stuff. */
8142 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
8143 {
8144 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
8145 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8146
8147 /* no need to free registers: */
8148 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
8149 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
8150 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
8151 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
8152 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
8153 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
8154
8155 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
8156 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8157 iemNativeVarFreeStackSlots(pReNative, idxVar);
8158 }
8159 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8160
8161 /*
8162 * Flush volatile registers as we make the call.
8163 */
8164 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
8165
8166 return off;
8167}
8168
8169
8170
8171/*********************************************************************************************************************************
8172* TLB Lookup. *
8173*********************************************************************************************************************************/
8174
8175/**
8176 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
8177 */
8178DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
8179{
8180 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
8181 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
8182 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
8183 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
8184
8185 /* Do the lookup manually. */
8186 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
8187 uint64_t const uTag = IEMTLB_CALC_TAG( &pVCpu->iem.s.DataTlb, GCPtrFlat);
8188 PIEMTLBENTRY const pTlbe = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
8189 if (RT_LIKELY(pTlbe->uTag == uTag))
8190 {
8191 /*
8192 * Check TLB page table level access flags.
8193 */
8194 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
8195 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
8196 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
8197 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
8198 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
8199 | IEMTLBE_F_PG_UNASSIGNED
8200 | IEMTLBE_F_PT_NO_ACCESSED
8201 | fNoWriteNoDirty | fNoUser);
8202 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
8203 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
8204 {
8205 /*
8206 * Return the address.
8207 */
8208 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
8209 if ((uintptr_t)pbAddr == uResult)
8210 return;
8211 RT_NOREF(cbMem);
8212 AssertFailed();
8213 }
8214 else
8215 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
8216 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
8217 }
8218 else
8219 AssertFailed();
8220 RT_BREAKPOINT();
8221}
8222
8223/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
8224
8225
8226
8227/*********************************************************************************************************************************
8228* Recompiler Core. *
8229*********************************************************************************************************************************/
8230
8231/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
8232static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
8233{
8234 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
8235 pDis->cbCachedInstr += cbMaxRead;
8236 RT_NOREF(cbMinRead);
8237 return VERR_NO_DATA;
8238}
8239
8240
8241DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
8242{
8243 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
8244 {
8245#define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */
8246 ENTRY(fLocalForcedActions),
8247 ENTRY(iem.s.rcPassUp),
8248 ENTRY(iem.s.fExec),
8249 ENTRY(iem.s.pbInstrBuf),
8250 ENTRY(iem.s.uInstrBufPc),
8251 ENTRY(iem.s.GCPhysInstrBuf),
8252 ENTRY(iem.s.cbInstrBufTotal),
8253 ENTRY(iem.s.idxTbCurInstr),
8254#ifdef VBOX_WITH_STATISTICS
8255 ENTRY(iem.s.StatNativeTlbHitsForFetch),
8256 ENTRY(iem.s.StatNativeTlbHitsForStore),
8257 ENTRY(iem.s.StatNativeTlbHitsForStack),
8258 ENTRY(iem.s.StatNativeTlbHitsForMapped),
8259 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
8260 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
8261 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
8262 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
8263#endif
8264 ENTRY(iem.s.DataTlb.aEntries),
8265 ENTRY(iem.s.DataTlb.uTlbRevision),
8266 ENTRY(iem.s.DataTlb.uTlbPhysRev),
8267 ENTRY(iem.s.DataTlb.cTlbHits),
8268 ENTRY(iem.s.CodeTlb.aEntries),
8269 ENTRY(iem.s.CodeTlb.uTlbRevision),
8270 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
8271 ENTRY(iem.s.CodeTlb.cTlbHits),
8272 ENTRY(pVMR3),
8273 ENTRY(cpum.GstCtx.rax),
8274 ENTRY(cpum.GstCtx.ah),
8275 ENTRY(cpum.GstCtx.rcx),
8276 ENTRY(cpum.GstCtx.ch),
8277 ENTRY(cpum.GstCtx.rdx),
8278 ENTRY(cpum.GstCtx.dh),
8279 ENTRY(cpum.GstCtx.rbx),
8280 ENTRY(cpum.GstCtx.bh),
8281 ENTRY(cpum.GstCtx.rsp),
8282 ENTRY(cpum.GstCtx.rbp),
8283 ENTRY(cpum.GstCtx.rsi),
8284 ENTRY(cpum.GstCtx.rdi),
8285 ENTRY(cpum.GstCtx.r8),
8286 ENTRY(cpum.GstCtx.r9),
8287 ENTRY(cpum.GstCtx.r10),
8288 ENTRY(cpum.GstCtx.r11),
8289 ENTRY(cpum.GstCtx.r12),
8290 ENTRY(cpum.GstCtx.r13),
8291 ENTRY(cpum.GstCtx.r14),
8292 ENTRY(cpum.GstCtx.r15),
8293 ENTRY(cpum.GstCtx.es.Sel),
8294 ENTRY(cpum.GstCtx.es.u64Base),
8295 ENTRY(cpum.GstCtx.es.u32Limit),
8296 ENTRY(cpum.GstCtx.es.Attr),
8297 ENTRY(cpum.GstCtx.cs.Sel),
8298 ENTRY(cpum.GstCtx.cs.u64Base),
8299 ENTRY(cpum.GstCtx.cs.u32Limit),
8300 ENTRY(cpum.GstCtx.cs.Attr),
8301 ENTRY(cpum.GstCtx.ss.Sel),
8302 ENTRY(cpum.GstCtx.ss.u64Base),
8303 ENTRY(cpum.GstCtx.ss.u32Limit),
8304 ENTRY(cpum.GstCtx.ss.Attr),
8305 ENTRY(cpum.GstCtx.ds.Sel),
8306 ENTRY(cpum.GstCtx.ds.u64Base),
8307 ENTRY(cpum.GstCtx.ds.u32Limit),
8308 ENTRY(cpum.GstCtx.ds.Attr),
8309 ENTRY(cpum.GstCtx.fs.Sel),
8310 ENTRY(cpum.GstCtx.fs.u64Base),
8311 ENTRY(cpum.GstCtx.fs.u32Limit),
8312 ENTRY(cpum.GstCtx.fs.Attr),
8313 ENTRY(cpum.GstCtx.gs.Sel),
8314 ENTRY(cpum.GstCtx.gs.u64Base),
8315 ENTRY(cpum.GstCtx.gs.u32Limit),
8316 ENTRY(cpum.GstCtx.gs.Attr),
8317 ENTRY(cpum.GstCtx.rip),
8318 ENTRY(cpum.GstCtx.eflags),
8319 ENTRY(cpum.GstCtx.uRipInhibitInt),
8320#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8321 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
8322 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
8323 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
8324 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
8325 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
8326 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
8327 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
8328 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
8329 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
8330 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
8331 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
8332 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
8333 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
8334 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
8335 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
8336 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
8337 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
8338 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
8339 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
8340 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
8341 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
8342 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
8343 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
8344 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
8345 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
8346 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
8347 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
8348 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
8349 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
8350 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
8351 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
8352 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
8353#endif
8354#undef ENTRY
8355 };
8356#ifdef VBOX_STRICT
8357 static bool s_fOrderChecked = false;
8358 if (!s_fOrderChecked)
8359 {
8360 s_fOrderChecked = true;
8361 uint32_t offPrev = s_aMembers[0].off;
8362 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
8363 {
8364 Assert(s_aMembers[i].off > offPrev);
8365 offPrev = s_aMembers[i].off;
8366 }
8367 }
8368#endif
8369
8370 /*
8371 * Binary lookup.
8372 */
8373 unsigned iStart = 0;
8374 unsigned iEnd = RT_ELEMENTS(s_aMembers);
8375 for (;;)
8376 {
8377 unsigned const iCur = iStart + (iEnd - iStart) / 2;
8378 uint32_t const offCur = s_aMembers[iCur].off;
8379 if (off < offCur)
8380 {
8381 if (iCur != iStart)
8382 iEnd = iCur;
8383 else
8384 break;
8385 }
8386 else if (off > offCur)
8387 {
8388 if (iCur + 1 < iEnd)
8389 iStart = iCur + 1;
8390 else
8391 break;
8392 }
8393 else
8394 return s_aMembers[iCur].pszName;
8395 }
8396#ifdef VBOX_WITH_STATISTICS
8397 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
8398 return "iem.s.acThreadedFuncStats[iFn]";
8399#endif
8400 return NULL;
8401}
8402
8403
8404/**
8405 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
8406 * @returns pszBuf.
8407 * @param fFlags The flags.
8408 * @param pszBuf The output buffer.
8409 * @param cbBuf The output buffer size. At least 32 bytes.
8410 */
8411DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
8412{
8413 Assert(cbBuf >= 32);
8414 static RTSTRTUPLE const s_aModes[] =
8415 {
8416 /* [00] = */ { RT_STR_TUPLE("16BIT") },
8417 /* [01] = */ { RT_STR_TUPLE("32BIT") },
8418 /* [02] = */ { RT_STR_TUPLE("!2!") },
8419 /* [03] = */ { RT_STR_TUPLE("!3!") },
8420 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
8421 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
8422 /* [06] = */ { RT_STR_TUPLE("!6!") },
8423 /* [07] = */ { RT_STR_TUPLE("!7!") },
8424 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
8425 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
8426 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
8427 /* [0b] = */ { RT_STR_TUPLE("!b!") },
8428 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
8429 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
8430 /* [0e] = */ { RT_STR_TUPLE("!e!") },
8431 /* [0f] = */ { RT_STR_TUPLE("!f!") },
8432 /* [10] = */ { RT_STR_TUPLE("!10!") },
8433 /* [11] = */ { RT_STR_TUPLE("!11!") },
8434 /* [12] = */ { RT_STR_TUPLE("!12!") },
8435 /* [13] = */ { RT_STR_TUPLE("!13!") },
8436 /* [14] = */ { RT_STR_TUPLE("!14!") },
8437 /* [15] = */ { RT_STR_TUPLE("!15!") },
8438 /* [16] = */ { RT_STR_TUPLE("!16!") },
8439 /* [17] = */ { RT_STR_TUPLE("!17!") },
8440 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
8441 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
8442 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
8443 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
8444 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
8445 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
8446 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
8447 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
8448 };
8449 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
8450 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
8451 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
8452
8453 pszBuf[off++] = ' ';
8454 pszBuf[off++] = 'C';
8455 pszBuf[off++] = 'P';
8456 pszBuf[off++] = 'L';
8457 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
8458 Assert(off < 32);
8459
8460 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
8461
8462 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
8463 {
8464 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
8465 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
8466 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
8467 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
8468 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
8469 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
8470 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
8471 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
8472 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
8473 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
8474 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
8475 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
8476 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
8477 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
8478 };
8479 if (fFlags)
8480 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
8481 if (s_aFlags[i].fFlag & fFlags)
8482 {
8483 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
8484 pszBuf[off++] = ' ';
8485 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
8486 off += s_aFlags[i].cchName;
8487 fFlags &= ~s_aFlags[i].fFlag;
8488 if (!fFlags)
8489 break;
8490 }
8491 pszBuf[off] = '\0';
8492
8493 return pszBuf;
8494}
8495
8496
8497DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
8498{
8499 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
8500#if defined(RT_ARCH_AMD64)
8501 static const char * const a_apszMarkers[] =
8502 {
8503 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
8504 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
8505 };
8506#endif
8507
8508 char szDisBuf[512];
8509 DISSTATE Dis;
8510 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
8511 uint32_t const cNative = pTb->Native.cInstructions;
8512 uint32_t offNative = 0;
8513#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8514 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
8515#endif
8516 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
8517 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
8518 : DISCPUMODE_64BIT;
8519#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8520 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
8521#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8522 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
8523#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8524# error "Port me"
8525#else
8526 csh hDisasm = ~(size_t)0;
8527# if defined(RT_ARCH_AMD64)
8528 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
8529# elif defined(RT_ARCH_ARM64)
8530 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
8531# else
8532# error "Port me"
8533# endif
8534 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
8535
8536 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
8537 //Assert(rcCs == CS_ERR_OK);
8538#endif
8539
8540 /*
8541 * Print TB info.
8542 */
8543 pHlp->pfnPrintf(pHlp,
8544 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
8545 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
8546 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
8547 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
8548#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8549 if (pDbgInfo && pDbgInfo->cEntries > 1)
8550 {
8551 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
8552
8553 /*
8554 * This disassembly is driven by the debug info which follows the native
8555 * code and indicates when it starts with the next guest instructions,
8556 * where labels are and such things.
8557 */
8558 uint32_t idxThreadedCall = 0;
8559 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
8560 uint8_t idxRange = UINT8_MAX;
8561 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
8562 uint32_t offRange = 0;
8563 uint32_t offOpcodes = 0;
8564 uint32_t const cbOpcodes = pTb->cbOpcodes;
8565 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
8566 uint32_t const cDbgEntries = pDbgInfo->cEntries;
8567 uint32_t iDbgEntry = 1;
8568 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
8569
8570 while (offNative < cNative)
8571 {
8572 /* If we're at or have passed the point where the next chunk of debug
8573 info starts, process it. */
8574 if (offDbgNativeNext <= offNative)
8575 {
8576 offDbgNativeNext = UINT32_MAX;
8577 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
8578 {
8579 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
8580 {
8581 case kIemTbDbgEntryType_GuestInstruction:
8582 {
8583 /* Did the exec flag change? */
8584 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
8585 {
8586 pHlp->pfnPrintf(pHlp,
8587 " fExec change %#08x -> %#08x %s\n",
8588 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
8589 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
8590 szDisBuf, sizeof(szDisBuf)));
8591 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
8592 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
8593 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
8594 : DISCPUMODE_64BIT;
8595 }
8596
8597 /* New opcode range? We need to fend up a spurious debug info entry here for cases
8598 where the compilation was aborted before the opcode was recorded and the actual
8599 instruction was translated to a threaded call. This may happen when we run out
8600 of ranges, or when some complicated interrupts/FFs are found to be pending or
8601 similar. So, we just deal with it here rather than in the compiler code as it
8602 is a lot simpler to do here. */
8603 if ( idxRange == UINT8_MAX
8604 || idxRange >= cRanges
8605 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
8606 {
8607 idxRange += 1;
8608 if (idxRange < cRanges)
8609 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
8610 else
8611 continue;
8612 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
8613 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
8614 + (pTb->aRanges[idxRange].idxPhysPage == 0
8615 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
8616 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
8617 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
8618 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
8619 pTb->aRanges[idxRange].idxPhysPage);
8620 GCPhysPc += offRange;
8621 }
8622
8623 /* Disassemble the instruction. */
8624 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
8625 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
8626 uint32_t cbInstr = 1;
8627 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
8628 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
8629 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
8630 if (RT_SUCCESS(rc))
8631 {
8632 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8633 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8634 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8635 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8636
8637 static unsigned const s_offMarker = 55;
8638 static char const s_szMarker[] = " ; <--- guest";
8639 if (cch < s_offMarker)
8640 {
8641 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
8642 cch = s_offMarker;
8643 }
8644 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
8645 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
8646
8647 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
8648 }
8649 else
8650 {
8651 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
8652 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
8653 cbInstr = 1;
8654 }
8655 GCPhysPc += cbInstr;
8656 offOpcodes += cbInstr;
8657 offRange += cbInstr;
8658 continue;
8659 }
8660
8661 case kIemTbDbgEntryType_ThreadedCall:
8662 pHlp->pfnPrintf(pHlp,
8663 " Call #%u to %s (%u args) - %s\n",
8664 idxThreadedCall,
8665 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
8666 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
8667 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
8668 idxThreadedCall++;
8669 continue;
8670
8671 case kIemTbDbgEntryType_GuestRegShadowing:
8672 {
8673 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
8674 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
8675 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
8676 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
8677 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
8678 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
8679 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
8680 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
8681 else
8682 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
8683 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
8684 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
8685 continue;
8686 }
8687
8688#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8689 case kIemTbDbgEntryType_GuestSimdRegShadowing:
8690 {
8691 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
8692 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
8693 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
8694 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
8695 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
8696 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
8697 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
8698 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
8699 else
8700 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
8701 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
8702 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
8703 continue;
8704 }
8705#endif
8706
8707 case kIemTbDbgEntryType_Label:
8708 {
8709 const char *pszName = "what_the_fudge";
8710 const char *pszComment = "";
8711 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
8712 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
8713 {
8714 case kIemNativeLabelType_Return:
8715 pszName = "Return";
8716 break;
8717 case kIemNativeLabelType_ReturnBreak:
8718 pszName = "ReturnBreak";
8719 break;
8720 case kIemNativeLabelType_ReturnWithFlags:
8721 pszName = "ReturnWithFlags";
8722 break;
8723 case kIemNativeLabelType_NonZeroRetOrPassUp:
8724 pszName = "NonZeroRetOrPassUp";
8725 break;
8726 case kIemNativeLabelType_RaiseGp0:
8727 pszName = "RaiseGp0";
8728 break;
8729 case kIemNativeLabelType_RaiseNm:
8730 pszName = "RaiseNm";
8731 break;
8732 case kIemNativeLabelType_RaiseUd:
8733 pszName = "RaiseUd";
8734 break;
8735 case kIemNativeLabelType_RaiseMf:
8736 pszName = "RaiseMf";
8737 break;
8738 case kIemNativeLabelType_RaiseXf:
8739 pszName = "RaiseXf";
8740 break;
8741 case kIemNativeLabelType_ObsoleteTb:
8742 pszName = "ObsoleteTb";
8743 break;
8744 case kIemNativeLabelType_NeedCsLimChecking:
8745 pszName = "NeedCsLimChecking";
8746 break;
8747 case kIemNativeLabelType_CheckBranchMiss:
8748 pszName = "CheckBranchMiss";
8749 break;
8750 case kIemNativeLabelType_If:
8751 pszName = "If";
8752 fNumbered = true;
8753 break;
8754 case kIemNativeLabelType_Else:
8755 pszName = "Else";
8756 fNumbered = true;
8757 pszComment = " ; regs state restored pre-if-block";
8758 break;
8759 case kIemNativeLabelType_Endif:
8760 pszName = "Endif";
8761 fNumbered = true;
8762 break;
8763 case kIemNativeLabelType_CheckIrq:
8764 pszName = "CheckIrq_CheckVM";
8765 fNumbered = true;
8766 break;
8767 case kIemNativeLabelType_TlbLookup:
8768 pszName = "TlbLookup";
8769 fNumbered = true;
8770 break;
8771 case kIemNativeLabelType_TlbMiss:
8772 pszName = "TlbMiss";
8773 fNumbered = true;
8774 break;
8775 case kIemNativeLabelType_TlbDone:
8776 pszName = "TlbDone";
8777 fNumbered = true;
8778 break;
8779 case kIemNativeLabelType_Invalid:
8780 case kIemNativeLabelType_End:
8781 break;
8782 }
8783 if (fNumbered)
8784 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
8785 else
8786 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
8787 continue;
8788 }
8789
8790 case kIemTbDbgEntryType_NativeOffset:
8791 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
8792 Assert(offDbgNativeNext > offNative);
8793 break;
8794
8795#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8796 case kIemTbDbgEntryType_DelayedPcUpdate:
8797 pHlp->pfnPrintf(pHlp,
8798 " Updating guest PC value by %u (cInstrSkipped=%u)\n",
8799 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
8800 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
8801 continue;
8802#endif
8803
8804 default:
8805 AssertFailed();
8806 }
8807 iDbgEntry++;
8808 break;
8809 }
8810 }
8811
8812 /*
8813 * Disassemble the next native instruction.
8814 */
8815 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
8816# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
8817 uint32_t cbInstr = sizeof(paNative[0]);
8818 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
8819 if (RT_SUCCESS(rc))
8820 {
8821# if defined(RT_ARCH_AMD64)
8822 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
8823 {
8824 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
8825 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
8826 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
8827 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
8828 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
8829 uInfo & 0x8000 ? "recompiled" : "todo");
8830 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
8831 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
8832 else
8833 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
8834 }
8835 else
8836# endif
8837 {
8838 const char *pszAnnotation = NULL;
8839# ifdef RT_ARCH_AMD64
8840 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8841 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8842 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8843 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8844 PCDISOPPARAM pMemOp;
8845 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))
8846 pMemOp = &Dis.Param1;
8847 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))
8848 pMemOp = &Dis.Param2;
8849 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))
8850 pMemOp = &Dis.Param3;
8851 else
8852 pMemOp = NULL;
8853 if ( pMemOp
8854 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
8855 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
8856 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
8857 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
8858
8859#elif defined(RT_ARCH_ARM64)
8860 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
8861 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8862 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8863# else
8864# error "Port me"
8865# endif
8866 if (pszAnnotation)
8867 {
8868 static unsigned const s_offAnnotation = 55;
8869 size_t const cchAnnotation = strlen(pszAnnotation);
8870 size_t cchDis = strlen(szDisBuf);
8871 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
8872 {
8873 if (cchDis < s_offAnnotation)
8874 {
8875 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
8876 cchDis = s_offAnnotation;
8877 }
8878 szDisBuf[cchDis++] = ' ';
8879 szDisBuf[cchDis++] = ';';
8880 szDisBuf[cchDis++] = ' ';
8881 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
8882 }
8883 }
8884 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
8885 }
8886 }
8887 else
8888 {
8889# if defined(RT_ARCH_AMD64)
8890 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
8891 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
8892# elif defined(RT_ARCH_ARM64)
8893 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
8894# else
8895# error "Port me"
8896# endif
8897 cbInstr = sizeof(paNative[0]);
8898 }
8899 offNative += cbInstr / sizeof(paNative[0]);
8900
8901# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
8902 cs_insn *pInstr;
8903 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
8904 (uintptr_t)pNativeCur, 1, &pInstr);
8905 if (cInstrs > 0)
8906 {
8907 Assert(cInstrs == 1);
8908 const char *pszAnnotation = NULL;
8909# if defined(RT_ARCH_ARM64)
8910 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
8911 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
8912 {
8913 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
8914 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
8915 char *psz = strchr(pInstr->op_str, '[');
8916 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
8917 {
8918 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
8919 int32_t off = -1;
8920 psz += 4;
8921 if (*psz == ']')
8922 off = 0;
8923 else if (*psz == ',')
8924 {
8925 psz = RTStrStripL(psz + 1);
8926 if (*psz == '#')
8927 off = RTStrToInt32(&psz[1]);
8928 /** @todo deal with index registers and LSL as well... */
8929 }
8930 if (off >= 0)
8931 pszAnnotation = iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
8932 }
8933 }
8934# endif
8935
8936 size_t const cchOp = strlen(pInstr->op_str);
8937# if defined(RT_ARCH_AMD64)
8938 if (pszAnnotation)
8939 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
8940 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
8941 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
8942 else
8943 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
8944 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
8945
8946# else
8947 if (pszAnnotation)
8948 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
8949 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
8950 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
8951 else
8952 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
8953 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
8954# endif
8955 offNative += pInstr->size / sizeof(*pNativeCur);
8956 cs_free(pInstr, cInstrs);
8957 }
8958 else
8959 {
8960# if defined(RT_ARCH_AMD64)
8961 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
8962 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
8963# else
8964 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
8965# endif
8966 offNative++;
8967 }
8968# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
8969 }
8970 }
8971 else
8972#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
8973 {
8974 /*
8975 * No debug info, just disassemble the x86 code and then the native code.
8976 *
8977 * First the guest code:
8978 */
8979 for (unsigned i = 0; i < pTb->cRanges; i++)
8980 {
8981 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
8982 + (pTb->aRanges[i].idxPhysPage == 0
8983 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
8984 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
8985 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
8986 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
8987 unsigned off = pTb->aRanges[i].offOpcodes;
8988 /** @todo this ain't working when crossing pages! */
8989 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
8990 while (off < cbOpcodes)
8991 {
8992 uint32_t cbInstr = 1;
8993 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
8994 &pTb->pabOpcodes[off], cbOpcodes - off,
8995 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
8996 if (RT_SUCCESS(rc))
8997 {
8998 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8999 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9000 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9001 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9002 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
9003 GCPhysPc += cbInstr;
9004 off += cbInstr;
9005 }
9006 else
9007 {
9008 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
9009 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
9010 break;
9011 }
9012 }
9013 }
9014
9015 /*
9016 * Then the native code:
9017 */
9018 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
9019 while (offNative < cNative)
9020 {
9021 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9022# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9023 uint32_t cbInstr = sizeof(paNative[0]);
9024 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9025 if (RT_SUCCESS(rc))
9026 {
9027# if defined(RT_ARCH_AMD64)
9028 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9029 {
9030 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9031 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9032 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9033 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9034 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9035 uInfo & 0x8000 ? "recompiled" : "todo");
9036 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9037 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9038 else
9039 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9040 }
9041 else
9042# endif
9043 {
9044# ifdef RT_ARCH_AMD64
9045 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9046 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9047 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9048 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9049# elif defined(RT_ARCH_ARM64)
9050 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9051 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9052 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9053# else
9054# error "Port me"
9055# endif
9056 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9057 }
9058 }
9059 else
9060 {
9061# if defined(RT_ARCH_AMD64)
9062 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9063 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9064# else
9065 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9066# endif
9067 cbInstr = sizeof(paNative[0]);
9068 }
9069 offNative += cbInstr / sizeof(paNative[0]);
9070
9071# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9072 cs_insn *pInstr;
9073 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9074 (uintptr_t)pNativeCur, 1, &pInstr);
9075 if (cInstrs > 0)
9076 {
9077 Assert(cInstrs == 1);
9078# if defined(RT_ARCH_AMD64)
9079 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9080 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9081# else
9082 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9083 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9084# endif
9085 offNative += pInstr->size / sizeof(*pNativeCur);
9086 cs_free(pInstr, cInstrs);
9087 }
9088 else
9089 {
9090# if defined(RT_ARCH_AMD64)
9091 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9092 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9093# else
9094 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9095# endif
9096 offNative++;
9097 }
9098# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9099 }
9100 }
9101
9102#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9103 /* Cleanup. */
9104 cs_close(&hDisasm);
9105#endif
9106}
9107
9108
9109/**
9110 * Recompiles the given threaded TB into a native one.
9111 *
9112 * In case of failure the translation block will be returned as-is.
9113 *
9114 * @returns pTb.
9115 * @param pVCpu The cross context virtual CPU structure of the calling
9116 * thread.
9117 * @param pTb The threaded translation to recompile to native.
9118 */
9119DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
9120{
9121 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
9122
9123 /*
9124 * The first time thru, we allocate the recompiler state, the other times
9125 * we just need to reset it before using it again.
9126 */
9127 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
9128 if (RT_LIKELY(pReNative))
9129 iemNativeReInit(pReNative, pTb);
9130 else
9131 {
9132 pReNative = iemNativeInit(pVCpu, pTb);
9133 AssertReturn(pReNative, pTb);
9134 }
9135
9136#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9137 /*
9138 * First do liveness analysis. This is done backwards.
9139 */
9140 {
9141 uint32_t idxCall = pTb->Thrd.cCalls;
9142 if (idxCall <= pReNative->cLivenessEntriesAlloc)
9143 { /* likely */ }
9144 else
9145 {
9146 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
9147 while (idxCall > cAlloc)
9148 cAlloc *= 2;
9149 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
9150 AssertReturn(pvNew, pTb);
9151 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
9152 pReNative->cLivenessEntriesAlloc = cAlloc;
9153 }
9154 AssertReturn(idxCall > 0, pTb);
9155 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
9156
9157 /* The initial (final) entry. */
9158 idxCall--;
9159 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
9160
9161 /* Loop backwards thru the calls and fill in the other entries. */
9162 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
9163 while (idxCall > 0)
9164 {
9165 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
9166 if (pfnLiveness)
9167 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
9168 else
9169 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
9170 pCallEntry--;
9171 idxCall--;
9172 }
9173
9174# ifdef VBOX_WITH_STATISTICS
9175 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
9176 to 'clobbered' rather that 'input'. */
9177 /** @todo */
9178# endif
9179 }
9180#endif
9181
9182 /*
9183 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
9184 * for aborting if an error happens.
9185 */
9186 uint32_t cCallsLeft = pTb->Thrd.cCalls;
9187#ifdef LOG_ENABLED
9188 uint32_t const cCallsOrg = cCallsLeft;
9189#endif
9190 uint32_t off = 0;
9191 int rc = VINF_SUCCESS;
9192 IEMNATIVE_TRY_SETJMP(pReNative, rc)
9193 {
9194 /*
9195 * Emit prolog code (fixed).
9196 */
9197 off = iemNativeEmitProlog(pReNative, off);
9198
9199 /*
9200 * Convert the calls to native code.
9201 */
9202#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9203 int32_t iGstInstr = -1;
9204#endif
9205#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
9206 uint32_t cThreadedCalls = 0;
9207 uint32_t cRecompiledCalls = 0;
9208#endif
9209#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9210 uint32_t idxCurCall = 0;
9211#endif
9212 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
9213 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
9214 while (cCallsLeft-- > 0)
9215 {
9216 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
9217#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9218 pReNative->idxCurCall = idxCurCall;
9219#endif
9220
9221 /*
9222 * Debug info, assembly markup and statistics.
9223 */
9224#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
9225 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
9226 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
9227#endif
9228#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9229 iemNativeDbgInfoAddNativeOffset(pReNative, off);
9230 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
9231 {
9232 if (iGstInstr < (int32_t)pTb->cInstructions)
9233 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
9234 else
9235 Assert(iGstInstr == pTb->cInstructions);
9236 iGstInstr = pCallEntry->idxInstr;
9237 }
9238 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
9239#endif
9240#if defined(VBOX_STRICT)
9241 off = iemNativeEmitMarker(pReNative, off,
9242 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
9243#endif
9244#if defined(VBOX_STRICT)
9245 iemNativeRegAssertSanity(pReNative);
9246#endif
9247#ifdef VBOX_WITH_STATISTICS
9248 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
9249#endif
9250
9251 /*
9252 * Actual work.
9253 */
9254 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
9255 pfnRecom ? "(recompiled)" : "(todo)"));
9256 if (pfnRecom) /** @todo stats on this. */
9257 {
9258 off = pfnRecom(pReNative, off, pCallEntry);
9259 STAM_REL_STATS({cRecompiledCalls++;});
9260 }
9261 else
9262 {
9263 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
9264 STAM_REL_STATS({cThreadedCalls++;});
9265 }
9266 Assert(off <= pReNative->cInstrBufAlloc);
9267 Assert(pReNative->cCondDepth == 0);
9268
9269#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
9270 if (LogIs2Enabled())
9271 {
9272 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
9273# ifndef IEMLIVENESS_EXTENDED_LAYOUT
9274 static const char s_achState[] = "CUXI";
9275# else
9276 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
9277# endif
9278
9279 char szGpr[17];
9280 for (unsigned i = 0; i < 16; i++)
9281 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
9282 szGpr[16] = '\0';
9283
9284 char szSegBase[X86_SREG_COUNT + 1];
9285 char szSegLimit[X86_SREG_COUNT + 1];
9286 char szSegAttrib[X86_SREG_COUNT + 1];
9287 char szSegSel[X86_SREG_COUNT + 1];
9288 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
9289 {
9290 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
9291 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
9292 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
9293 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
9294 }
9295 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
9296 = szSegSel[X86_SREG_COUNT] = '\0';
9297
9298 char szEFlags[8];
9299 for (unsigned i = 0; i < 7; i++)
9300 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
9301 szEFlags[7] = '\0';
9302
9303 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
9304 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
9305 }
9306#endif
9307
9308 /*
9309 * Advance.
9310 */
9311 pCallEntry++;
9312#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9313 idxCurCall++;
9314#endif
9315 }
9316
9317 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
9318 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
9319 if (!cThreadedCalls)
9320 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
9321
9322 /*
9323 * Emit the epilog code.
9324 */
9325 uint32_t idxReturnLabel;
9326 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
9327
9328 /*
9329 * Generate special jump labels.
9330 */
9331 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
9332 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
9333 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
9334 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
9335 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
9336 off = iemNativeEmitRaiseGp0(pReNative, off, idxReturnLabel);
9337 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseNm))
9338 off = iemNativeEmitRaiseNm(pReNative, off, idxReturnLabel);
9339 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseUd))
9340 off = iemNativeEmitRaiseUd(pReNative, off, idxReturnLabel);
9341 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseMf))
9342 off = iemNativeEmitRaiseMf(pReNative, off, idxReturnLabel);
9343 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseXf))
9344 off = iemNativeEmitRaiseXf(pReNative, off, idxReturnLabel);
9345 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ObsoleteTb))
9346 off = iemNativeEmitObsoleteTb(pReNative, off, idxReturnLabel);
9347 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_NeedCsLimChecking))
9348 off = iemNativeEmitNeedCsLimChecking(pReNative, off, idxReturnLabel);
9349 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_CheckBranchMiss))
9350 off = iemNativeEmitCheckBranchMiss(pReNative, off, idxReturnLabel);
9351 }
9352 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
9353 {
9354 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
9355 return pTb;
9356 }
9357 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
9358 Assert(off <= pReNative->cInstrBufAlloc);
9359
9360 /*
9361 * Make sure all labels has been defined.
9362 */
9363 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
9364#ifdef VBOX_STRICT
9365 uint32_t const cLabels = pReNative->cLabels;
9366 for (uint32_t i = 0; i < cLabels; i++)
9367 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
9368#endif
9369
9370 /*
9371 * Allocate executable memory, copy over the code we've generated.
9372 */
9373 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
9374 if (pTbAllocator->pDelayedFreeHead)
9375 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
9376
9377 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
9378 AssertReturn(paFinalInstrBuf, pTb);
9379 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
9380
9381 /*
9382 * Apply fixups.
9383 */
9384 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
9385 uint32_t const cFixups = pReNative->cFixups;
9386 for (uint32_t i = 0; i < cFixups; i++)
9387 {
9388 Assert(paFixups[i].off < off);
9389 Assert(paFixups[i].idxLabel < cLabels);
9390 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
9391 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
9392 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
9393 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
9394 switch (paFixups[i].enmType)
9395 {
9396#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
9397 case kIemNativeFixupType_Rel32:
9398 Assert(paFixups[i].off + 4 <= off);
9399 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9400 continue;
9401
9402#elif defined(RT_ARCH_ARM64)
9403 case kIemNativeFixupType_RelImm26At0:
9404 {
9405 Assert(paFixups[i].off < off);
9406 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9407 Assert(offDisp >= -262144 && offDisp < 262144);
9408 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
9409 continue;
9410 }
9411
9412 case kIemNativeFixupType_RelImm19At5:
9413 {
9414 Assert(paFixups[i].off < off);
9415 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9416 Assert(offDisp >= -262144 && offDisp < 262144);
9417 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
9418 continue;
9419 }
9420
9421 case kIemNativeFixupType_RelImm14At5:
9422 {
9423 Assert(paFixups[i].off < off);
9424 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9425 Assert(offDisp >= -8192 && offDisp < 8192);
9426 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
9427 continue;
9428 }
9429
9430#endif
9431 case kIemNativeFixupType_Invalid:
9432 case kIemNativeFixupType_End:
9433 break;
9434 }
9435 AssertFailed();
9436 }
9437
9438 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
9439 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
9440
9441 /*
9442 * Convert the translation block.
9443 */
9444 RTMemFree(pTb->Thrd.paCalls);
9445 pTb->Native.paInstructions = paFinalInstrBuf;
9446 pTb->Native.cInstructions = off;
9447 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
9448#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9449 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
9450 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
9451#endif
9452
9453 Assert(pTbAllocator->cThreadedTbs > 0);
9454 pTbAllocator->cThreadedTbs -= 1;
9455 pTbAllocator->cNativeTbs += 1;
9456 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
9457
9458#ifdef LOG_ENABLED
9459 /*
9460 * Disassemble to the log if enabled.
9461 */
9462 if (LogIs3Enabled())
9463 {
9464 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
9465 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
9466# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
9467 RTLogFlush(NULL);
9468# endif
9469 }
9470#endif
9471 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
9472
9473 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
9474 return pTb;
9475}
9476
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette