VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 103913

Last change on this file since 103913 was 103913, checked in by vboxsync, 9 months ago

VMM/IEM: Implement support for checking the high 128-bit register against the value stored in CPUMCTX for amd64, bugref:10614

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 421.7 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 103913 2024-03-19 11:47:09Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMN8veRecompilerTlbLookup.h"
103#include "IEMNativeFunctions.h"
104
105
106/*
107 * Narrow down configs here to avoid wasting time on unused configs here.
108 * Note! Same checks in IEMAllThrdRecompiler.cpp.
109 */
110
111#ifndef IEM_WITH_CODE_TLB
112# error The code TLB must be enabled for the recompiler.
113#endif
114
115#ifndef IEM_WITH_DATA_TLB
116# error The data TLB must be enabled for the recompiler.
117#endif
118
119#ifndef IEM_WITH_SETJMP
120# error The setjmp approach must be enabled for the recompiler.
121#endif
122
123/** @todo eliminate this clang build hack. */
124#if RT_CLANG_PREREQ(4, 0)
125# pragma GCC diagnostic ignored "-Wunused-function"
126#endif
127
128
129/*********************************************************************************************************************************
130* Internal Functions *
131*********************************************************************************************************************************/
132#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
133static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
134#endif
135DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
136DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
137 IEMNATIVEGSTREG enmGstReg, uint32_t off);
138DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
139
140
141/*********************************************************************************************************************************
142* Executable Memory Allocator *
143*********************************************************************************************************************************/
144/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
145 * Use an alternative chunk sub-allocator that does store internal data
146 * in the chunk.
147 *
148 * Using the RTHeapSimple is not practial on newer darwin systems where
149 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
150 * memory. We would have to change the protection of the whole chunk for
151 * every call to RTHeapSimple, which would be rather expensive.
152 *
153 * This alternative implemenation let restrict page protection modifications
154 * to the pages backing the executable memory we just allocated.
155 */
156#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
157/** The chunk sub-allocation unit size in bytes. */
158#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
159/** The chunk sub-allocation unit size as a shift factor. */
160#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
161
162#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
163# ifdef IEMNATIVE_USE_GDB_JIT
164# define IEMNATIVE_USE_GDB_JIT_ET_DYN
165
166/** GDB JIT: Code entry. */
167typedef struct GDBJITCODEENTRY
168{
169 struct GDBJITCODEENTRY *pNext;
170 struct GDBJITCODEENTRY *pPrev;
171 uint8_t *pbSymFile;
172 uint64_t cbSymFile;
173} GDBJITCODEENTRY;
174
175/** GDB JIT: Actions. */
176typedef enum GDBJITACTIONS : uint32_t
177{
178 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
179} GDBJITACTIONS;
180
181/** GDB JIT: Descriptor. */
182typedef struct GDBJITDESCRIPTOR
183{
184 uint32_t uVersion;
185 GDBJITACTIONS enmAction;
186 GDBJITCODEENTRY *pRelevant;
187 GDBJITCODEENTRY *pHead;
188 /** Our addition: */
189 GDBJITCODEENTRY *pTail;
190} GDBJITDESCRIPTOR;
191
192/** GDB JIT: Our simple symbol file data. */
193typedef struct GDBJITSYMFILE
194{
195 Elf64_Ehdr EHdr;
196# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
197 Elf64_Shdr aShdrs[5];
198# else
199 Elf64_Shdr aShdrs[7];
200 Elf64_Phdr aPhdrs[2];
201# endif
202 /** The dwarf ehframe data for the chunk. */
203 uint8_t abEhFrame[512];
204 char szzStrTab[128];
205 Elf64_Sym aSymbols[3];
206# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
207 Elf64_Sym aDynSyms[2];
208 Elf64_Dyn aDyn[6];
209# endif
210} GDBJITSYMFILE;
211
212extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
213extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
214
215/** Init once for g_IemNativeGdbJitLock. */
216static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
217/** Init once for the critical section. */
218static RTCRITSECT g_IemNativeGdbJitLock;
219
220/** GDB reads the info here. */
221GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
222
223/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
224DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
225{
226 ASMNopPause();
227}
228
229/** @callback_method_impl{FNRTONCE} */
230static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
231{
232 RT_NOREF(pvUser);
233 return RTCritSectInit(&g_IemNativeGdbJitLock);
234}
235
236
237# endif /* IEMNATIVE_USE_GDB_JIT */
238
239/**
240 * Per-chunk unwind info for non-windows hosts.
241 */
242typedef struct IEMEXECMEMCHUNKEHFRAME
243{
244# ifdef IEMNATIVE_USE_LIBUNWIND
245 /** The offset of the FDA into abEhFrame. */
246 uintptr_t offFda;
247# else
248 /** 'struct object' storage area. */
249 uint8_t abObject[1024];
250# endif
251# ifdef IEMNATIVE_USE_GDB_JIT
252# if 0
253 /** The GDB JIT 'symbol file' data. */
254 GDBJITSYMFILE GdbJitSymFile;
255# endif
256 /** The GDB JIT list entry. */
257 GDBJITCODEENTRY GdbJitEntry;
258# endif
259 /** The dwarf ehframe data for the chunk. */
260 uint8_t abEhFrame[512];
261} IEMEXECMEMCHUNKEHFRAME;
262/** Pointer to per-chunk info info for non-windows hosts. */
263typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
264#endif
265
266
267/**
268 * An chunk of executable memory.
269 */
270typedef struct IEMEXECMEMCHUNK
271{
272#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
273 /** Number of free items in this chunk. */
274 uint32_t cFreeUnits;
275 /** Hint were to start searching for free space in the allocation bitmap. */
276 uint32_t idxFreeHint;
277#else
278 /** The heap handle. */
279 RTHEAPSIMPLE hHeap;
280#endif
281 /** Pointer to the chunk. */
282 void *pvChunk;
283#ifdef IN_RING3
284 /**
285 * Pointer to the unwind information.
286 *
287 * This is used during C++ throw and longjmp (windows and probably most other
288 * platforms). Some debuggers (windbg) makes use of it as well.
289 *
290 * Windows: This is allocated from hHeap on windows because (at least for
291 * AMD64) the UNWIND_INFO structure address in the
292 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
293 *
294 * Others: Allocated from the regular heap to avoid unnecessary executable data
295 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
296 void *pvUnwindInfo;
297#elif defined(IN_RING0)
298 /** Allocation handle. */
299 RTR0MEMOBJ hMemObj;
300#endif
301} IEMEXECMEMCHUNK;
302/** Pointer to a memory chunk. */
303typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
304
305
306/**
307 * Executable memory allocator for the native recompiler.
308 */
309typedef struct IEMEXECMEMALLOCATOR
310{
311 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
312 uint32_t uMagic;
313
314 /** The chunk size. */
315 uint32_t cbChunk;
316 /** The maximum number of chunks. */
317 uint32_t cMaxChunks;
318 /** The current number of chunks. */
319 uint32_t cChunks;
320 /** Hint where to start looking for available memory. */
321 uint32_t idxChunkHint;
322 /** Statistics: Current number of allocations. */
323 uint32_t cAllocations;
324
325 /** The total amount of memory available. */
326 uint64_t cbTotal;
327 /** Total amount of free memory. */
328 uint64_t cbFree;
329 /** Total amount of memory allocated. */
330 uint64_t cbAllocated;
331
332#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
333 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
334 *
335 * Since the chunk size is a power of two and the minimum chunk size is a lot
336 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
337 * require a whole number of uint64_t elements in the allocation bitmap. So,
338 * for sake of simplicity, they are allocated as one continous chunk for
339 * simplicity/laziness. */
340 uint64_t *pbmAlloc;
341 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
342 uint32_t cUnitsPerChunk;
343 /** Number of bitmap elements per chunk (for quickly locating the bitmap
344 * portion corresponding to an chunk). */
345 uint32_t cBitmapElementsPerChunk;
346#else
347 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
348 * @{ */
349 /** The size of the heap internal block header. This is used to adjust the
350 * request memory size to make sure there is exacly enough room for a header at
351 * the end of the blocks we allocate before the next 64 byte alignment line. */
352 uint32_t cbHeapBlockHdr;
353 /** The size of initial heap allocation required make sure the first
354 * allocation is correctly aligned. */
355 uint32_t cbHeapAlignTweak;
356 /** The alignment tweak allocation address. */
357 void *pvAlignTweak;
358 /** @} */
359#endif
360
361#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
362 /** Pointer to the array of unwind info running parallel to aChunks (same
363 * allocation as this structure, located after the bitmaps).
364 * (For Windows, the structures must reside in 32-bit RVA distance to the
365 * actual chunk, so they are allocated off the chunk.) */
366 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
367#endif
368
369 /** The allocation chunks. */
370 RT_FLEXIBLE_ARRAY_EXTENSION
371 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
372} IEMEXECMEMALLOCATOR;
373/** Pointer to an executable memory allocator. */
374typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
375
376/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
377#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
378
379
380static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
381
382
383/**
384 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
385 * the heap statistics.
386 */
387static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
388 uint32_t cbReq, uint32_t idxChunk)
389{
390 pExecMemAllocator->cAllocations += 1;
391 pExecMemAllocator->cbAllocated += cbReq;
392#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
393 pExecMemAllocator->cbFree -= cbReq;
394#else
395 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
396#endif
397 pExecMemAllocator->idxChunkHint = idxChunk;
398
399#ifdef RT_OS_DARWIN
400 /*
401 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
402 * on darwin. So, we mark the pages returned as read+write after alloc and
403 * expect the caller to call iemExecMemAllocatorReadyForUse when done
404 * writing to the allocation.
405 *
406 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
407 * for details.
408 */
409 /** @todo detect if this is necessary... it wasn't required on 10.15 or
410 * whatever older version it was. */
411 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
412 AssertRC(rc);
413#endif
414
415 return pvRet;
416}
417
418
419#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
420static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
421 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
422{
423 /*
424 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
425 */
426 Assert(!(cToScan & 63));
427 Assert(!(idxFirst & 63));
428 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
429 pbmAlloc += idxFirst / 64;
430
431 /*
432 * Scan the bitmap for cReqUnits of consequtive clear bits
433 */
434 /** @todo This can probably be done more efficiently for non-x86 systems. */
435 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
436 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
437 {
438 uint32_t idxAddBit = 1;
439 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
440 idxAddBit++;
441 if (idxAddBit >= cReqUnits)
442 {
443 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
444
445 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
446 pChunk->cFreeUnits -= cReqUnits;
447 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
448
449 void * const pvRet = (uint8_t *)pChunk->pvChunk
450 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
451
452 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
453 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
454 }
455
456 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
457 }
458 return NULL;
459}
460#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
461
462
463static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
464{
465#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
466 /*
467 * Figure out how much to allocate.
468 */
469 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
470 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
471 {
472 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
473 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
474 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
475 {
476 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
477 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
478 if (pvRet)
479 return pvRet;
480 }
481 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
482 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
483 cReqUnits, idxChunk);
484 }
485#else
486 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
487 if (pvRet)
488 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
489#endif
490 return NULL;
491
492}
493
494
495/**
496 * Allocates @a cbReq bytes of executable memory.
497 *
498 * @returns Pointer to the memory, NULL if out of memory or other problem
499 * encountered.
500 * @param pVCpu The cross context virtual CPU structure of the calling
501 * thread.
502 * @param cbReq How many bytes are required.
503 */
504static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
505{
506 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
507 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
508 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
509
510
511 for (unsigned iIteration = 0;; iIteration++)
512 {
513 /*
514 * Adjust the request size so it'll fit the allocator alignment/whatnot.
515 *
516 * For the RTHeapSimple allocator this means to follow the logic described
517 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
518 * existing chunks if we think we've got sufficient free memory around.
519 *
520 * While for the alternative one we just align it up to a whole unit size.
521 */
522#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
523 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
524#else
525 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
526#endif
527 if (cbReq <= pExecMemAllocator->cbFree)
528 {
529 uint32_t const cChunks = pExecMemAllocator->cChunks;
530 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
531 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
532 {
533 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
534 if (pvRet)
535 return pvRet;
536 }
537 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
538 {
539 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
540 if (pvRet)
541 return pvRet;
542 }
543 }
544
545 /*
546 * Can we grow it with another chunk?
547 */
548 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
549 {
550 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
551 AssertLogRelRCReturn(rc, NULL);
552
553 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
554 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
555 if (pvRet)
556 return pvRet;
557 AssertFailed();
558 }
559
560 /*
561 * Try prune native TBs once.
562 */
563 if (iIteration == 0)
564 iemTbAllocatorFreeupNativeSpace(pVCpu, cbReq / sizeof(IEMNATIVEINSTR));
565 else
566 {
567 /** @todo stats... */
568 return NULL;
569 }
570 }
571
572}
573
574
575/** This is a hook that we may need later for changing memory protection back
576 * to readonly+exec */
577static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
578{
579#ifdef RT_OS_DARWIN
580 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
581 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
582 AssertRC(rc); RT_NOREF(pVCpu);
583
584 /*
585 * Flush the instruction cache:
586 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
587 */
588 /* sys_dcache_flush(pv, cb); - not necessary */
589 sys_icache_invalidate(pv, cb);
590#else
591 RT_NOREF(pVCpu, pv, cb);
592#endif
593}
594
595
596/**
597 * Frees executable memory.
598 */
599void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
600{
601 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
602 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
603 Assert(pv);
604#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
605 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
606#else
607 Assert(!((uintptr_t)pv & 63));
608#endif
609
610 /* Align the size as we did when allocating the block. */
611#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
612 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
613#else
614 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
615#endif
616
617 /* Free it / assert sanity. */
618#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
619 uint32_t const cChunks = pExecMemAllocator->cChunks;
620 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
621 bool fFound = false;
622 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
623 {
624 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
625 fFound = offChunk < cbChunk;
626 if (fFound)
627 {
628#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
629 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
630 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
631
632 /* Check that it's valid and free it. */
633 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
634 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
635 for (uint32_t i = 1; i < cReqUnits; i++)
636 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
637 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
638
639 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
640 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
641
642 /* Update the stats. */
643 pExecMemAllocator->cbAllocated -= cb;
644 pExecMemAllocator->cbFree += cb;
645 pExecMemAllocator->cAllocations -= 1;
646 return;
647#else
648 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
649 break;
650#endif
651 }
652 }
653# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
654 AssertFailed();
655# else
656 Assert(fFound);
657# endif
658#endif
659
660#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
661 /* Update stats while cb is freshly calculated.*/
662 pExecMemAllocator->cbAllocated -= cb;
663 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
664 pExecMemAllocator->cAllocations -= 1;
665
666 /* Free it. */
667 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
668#endif
669}
670
671
672
673#ifdef IN_RING3
674# ifdef RT_OS_WINDOWS
675
676/**
677 * Initializes the unwind info structures for windows hosts.
678 */
679static int
680iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
681 void *pvChunk, uint32_t idxChunk)
682{
683 RT_NOREF(pVCpu);
684
685 /*
686 * The AMD64 unwind opcodes.
687 *
688 * This is a program that starts with RSP after a RET instruction that
689 * ends up in recompiled code, and the operations we describe here will
690 * restore all non-volatile registers and bring RSP back to where our
691 * RET address is. This means it's reverse order from what happens in
692 * the prologue.
693 *
694 * Note! Using a frame register approach here both because we have one
695 * and but mainly because the UWOP_ALLOC_LARGE argument values
696 * would be a pain to write initializers for. On the positive
697 * side, we're impervious to changes in the the stack variable
698 * area can can deal with dynamic stack allocations if necessary.
699 */
700 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
701 {
702 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
703 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
704 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
705 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
706 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
707 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
708 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
709 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
710 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
711 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
712 };
713 union
714 {
715 IMAGE_UNWIND_INFO Info;
716 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
717 } s_UnwindInfo =
718 {
719 {
720 /* .Version = */ 1,
721 /* .Flags = */ 0,
722 /* .SizeOfProlog = */ 16, /* whatever */
723 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
724 /* .FrameRegister = */ X86_GREG_xBP,
725 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
726 }
727 };
728 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
729 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
730
731 /*
732 * Calc how much space we need and allocate it off the exec heap.
733 */
734 unsigned const cFunctionEntries = 1;
735 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
736 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
737# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
738 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
739 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
740 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
741# else
742 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
743 - pExecMemAllocator->cbHeapBlockHdr;
744 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
745 32 /*cbAlignment*/);
746# endif
747 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
748 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
749
750 /*
751 * Initialize the structures.
752 */
753 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
754
755 paFunctions[0].BeginAddress = 0;
756 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
757 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
758
759 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
760 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
761
762 /*
763 * Register it.
764 */
765 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
766 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
767
768 return VINF_SUCCESS;
769}
770
771
772# else /* !RT_OS_WINDOWS */
773
774/**
775 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
776 */
777DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
778{
779 if (iValue >= 64)
780 {
781 Assert(iValue < 0x2000);
782 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
783 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
784 }
785 else if (iValue >= 0)
786 *Ptr.pb++ = (uint8_t)iValue;
787 else if (iValue > -64)
788 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
789 else
790 {
791 Assert(iValue > -0x2000);
792 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
793 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
794 }
795 return Ptr;
796}
797
798
799/**
800 * Emits an ULEB128 encoded value (up to 64-bit wide).
801 */
802DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
803{
804 while (uValue >= 0x80)
805 {
806 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
807 uValue >>= 7;
808 }
809 *Ptr.pb++ = (uint8_t)uValue;
810 return Ptr;
811}
812
813
814/**
815 * Emits a CFA rule as register @a uReg + offset @a off.
816 */
817DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
818{
819 *Ptr.pb++ = DW_CFA_def_cfa;
820 Ptr = iemDwarfPutUleb128(Ptr, uReg);
821 Ptr = iemDwarfPutUleb128(Ptr, off);
822 return Ptr;
823}
824
825
826/**
827 * Emits a register (@a uReg) save location:
828 * CFA + @a off * data_alignment_factor
829 */
830DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
831{
832 if (uReg < 0x40)
833 *Ptr.pb++ = DW_CFA_offset | uReg;
834 else
835 {
836 *Ptr.pb++ = DW_CFA_offset_extended;
837 Ptr = iemDwarfPutUleb128(Ptr, uReg);
838 }
839 Ptr = iemDwarfPutUleb128(Ptr, off);
840 return Ptr;
841}
842
843
844# if 0 /* unused */
845/**
846 * Emits a register (@a uReg) save location, using signed offset:
847 * CFA + @a offSigned * data_alignment_factor
848 */
849DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
850{
851 *Ptr.pb++ = DW_CFA_offset_extended_sf;
852 Ptr = iemDwarfPutUleb128(Ptr, uReg);
853 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
854 return Ptr;
855}
856# endif
857
858
859/**
860 * Initializes the unwind info section for non-windows hosts.
861 */
862static int
863iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
864 void *pvChunk, uint32_t idxChunk)
865{
866 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
867 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
868
869 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
870
871 /*
872 * Generate the CIE first.
873 */
874# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
875 uint8_t const iDwarfVer = 3;
876# else
877 uint8_t const iDwarfVer = 4;
878# endif
879 RTPTRUNION const PtrCie = Ptr;
880 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
881 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
882 *Ptr.pb++ = iDwarfVer; /* DwARF version */
883 *Ptr.pb++ = 0; /* Augmentation. */
884 if (iDwarfVer >= 4)
885 {
886 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
887 *Ptr.pb++ = 0; /* Segment selector size. */
888 }
889# ifdef RT_ARCH_AMD64
890 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
891# else
892 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
893# endif
894 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
895# ifdef RT_ARCH_AMD64
896 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
897# elif defined(RT_ARCH_ARM64)
898 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
899# else
900# error "port me"
901# endif
902 /* Initial instructions: */
903# ifdef RT_ARCH_AMD64
904 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
905 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
906 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
907 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
908 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
909 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
910 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
911 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
912# elif defined(RT_ARCH_ARM64)
913# if 1
914 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
915# else
916 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
917# endif
918 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
919 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
920 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
921 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
922 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
923 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
924 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
925 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
926 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
927 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
928 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
929 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
930 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
931 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
932# else
933# error "port me"
934# endif
935 while ((Ptr.u - PtrCie.u) & 3)
936 *Ptr.pb++ = DW_CFA_nop;
937 /* Finalize the CIE size. */
938 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
939
940 /*
941 * Generate an FDE for the whole chunk area.
942 */
943# ifdef IEMNATIVE_USE_LIBUNWIND
944 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
945# endif
946 RTPTRUNION const PtrFde = Ptr;
947 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
948 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
949 Ptr.pu32++;
950 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
951 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
952# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
953 *Ptr.pb++ = DW_CFA_nop;
954# endif
955 while ((Ptr.u - PtrFde.u) & 3)
956 *Ptr.pb++ = DW_CFA_nop;
957 /* Finalize the FDE size. */
958 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
959
960 /* Terminator entry. */
961 *Ptr.pu32++ = 0;
962 *Ptr.pu32++ = 0; /* just to be sure... */
963 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
964
965 /*
966 * Register it.
967 */
968# ifdef IEMNATIVE_USE_LIBUNWIND
969 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
970# else
971 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
972 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
973# endif
974
975# ifdef IEMNATIVE_USE_GDB_JIT
976 /*
977 * Now for telling GDB about this (experimental).
978 *
979 * This seems to work best with ET_DYN.
980 */
981 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
982# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
983 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
984 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
985# else
986 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
987 - pExecMemAllocator->cbHeapBlockHdr;
988 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
989# endif
990 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
991 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
992
993 RT_ZERO(*pSymFile);
994
995 /*
996 * The ELF header:
997 */
998 pSymFile->EHdr.e_ident[0] = ELFMAG0;
999 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1000 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1001 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1002 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1003 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1004 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1005 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1006# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1007 pSymFile->EHdr.e_type = ET_DYN;
1008# else
1009 pSymFile->EHdr.e_type = ET_REL;
1010# endif
1011# ifdef RT_ARCH_AMD64
1012 pSymFile->EHdr.e_machine = EM_AMD64;
1013# elif defined(RT_ARCH_ARM64)
1014 pSymFile->EHdr.e_machine = EM_AARCH64;
1015# else
1016# error "port me"
1017# endif
1018 pSymFile->EHdr.e_version = 1; /*?*/
1019 pSymFile->EHdr.e_entry = 0;
1020# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1021 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1022# else
1023 pSymFile->EHdr.e_phoff = 0;
1024# endif
1025 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1026 pSymFile->EHdr.e_flags = 0;
1027 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1028# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1029 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1030 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1031# else
1032 pSymFile->EHdr.e_phentsize = 0;
1033 pSymFile->EHdr.e_phnum = 0;
1034# endif
1035 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1036 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1037 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1038
1039 uint32_t offStrTab = 0;
1040#define APPEND_STR(a_szStr) do { \
1041 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1042 offStrTab += sizeof(a_szStr); \
1043 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1044 } while (0)
1045#define APPEND_STR_FMT(a_szStr, ...) do { \
1046 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1047 offStrTab++; \
1048 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1049 } while (0)
1050
1051 /*
1052 * Section headers.
1053 */
1054 /* Section header #0: NULL */
1055 unsigned i = 0;
1056 APPEND_STR("");
1057 RT_ZERO(pSymFile->aShdrs[i]);
1058 i++;
1059
1060 /* Section header: .eh_frame */
1061 pSymFile->aShdrs[i].sh_name = offStrTab;
1062 APPEND_STR(".eh_frame");
1063 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1064 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1065# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1066 pSymFile->aShdrs[i].sh_offset
1067 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1068# else
1069 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1070 pSymFile->aShdrs[i].sh_offset = 0;
1071# endif
1072
1073 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1074 pSymFile->aShdrs[i].sh_link = 0;
1075 pSymFile->aShdrs[i].sh_info = 0;
1076 pSymFile->aShdrs[i].sh_addralign = 1;
1077 pSymFile->aShdrs[i].sh_entsize = 0;
1078 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1079 i++;
1080
1081 /* Section header: .shstrtab */
1082 unsigned const iShStrTab = i;
1083 pSymFile->EHdr.e_shstrndx = iShStrTab;
1084 pSymFile->aShdrs[i].sh_name = offStrTab;
1085 APPEND_STR(".shstrtab");
1086 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1087 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1088# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1089 pSymFile->aShdrs[i].sh_offset
1090 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1091# else
1092 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1093 pSymFile->aShdrs[i].sh_offset = 0;
1094# endif
1095 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1096 pSymFile->aShdrs[i].sh_link = 0;
1097 pSymFile->aShdrs[i].sh_info = 0;
1098 pSymFile->aShdrs[i].sh_addralign = 1;
1099 pSymFile->aShdrs[i].sh_entsize = 0;
1100 i++;
1101
1102 /* Section header: .symbols */
1103 pSymFile->aShdrs[i].sh_name = offStrTab;
1104 APPEND_STR(".symtab");
1105 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1106 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1107 pSymFile->aShdrs[i].sh_offset
1108 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1109 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1110 pSymFile->aShdrs[i].sh_link = iShStrTab;
1111 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1112 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1113 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1114 i++;
1115
1116# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1117 /* Section header: .symbols */
1118 pSymFile->aShdrs[i].sh_name = offStrTab;
1119 APPEND_STR(".dynsym");
1120 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1121 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1122 pSymFile->aShdrs[i].sh_offset
1123 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1124 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1125 pSymFile->aShdrs[i].sh_link = iShStrTab;
1126 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1127 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1128 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1129 i++;
1130# endif
1131
1132# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1133 /* Section header: .dynamic */
1134 pSymFile->aShdrs[i].sh_name = offStrTab;
1135 APPEND_STR(".dynamic");
1136 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1137 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1138 pSymFile->aShdrs[i].sh_offset
1139 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1140 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1141 pSymFile->aShdrs[i].sh_link = iShStrTab;
1142 pSymFile->aShdrs[i].sh_info = 0;
1143 pSymFile->aShdrs[i].sh_addralign = 1;
1144 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1145 i++;
1146# endif
1147
1148 /* Section header: .text */
1149 unsigned const iShText = i;
1150 pSymFile->aShdrs[i].sh_name = offStrTab;
1151 APPEND_STR(".text");
1152 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1153 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1154# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1155 pSymFile->aShdrs[i].sh_offset
1156 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1157# else
1158 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1159 pSymFile->aShdrs[i].sh_offset = 0;
1160# endif
1161 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1162 pSymFile->aShdrs[i].sh_link = 0;
1163 pSymFile->aShdrs[i].sh_info = 0;
1164 pSymFile->aShdrs[i].sh_addralign = 1;
1165 pSymFile->aShdrs[i].sh_entsize = 0;
1166 i++;
1167
1168 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1169
1170# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1171 /*
1172 * The program headers:
1173 */
1174 /* Everything in a single LOAD segment: */
1175 i = 0;
1176 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1177 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1178 pSymFile->aPhdrs[i].p_offset
1179 = pSymFile->aPhdrs[i].p_vaddr
1180 = pSymFile->aPhdrs[i].p_paddr = 0;
1181 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1182 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1183 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1184 i++;
1185 /* The .dynamic segment. */
1186 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1187 pSymFile->aPhdrs[i].p_flags = PF_R;
1188 pSymFile->aPhdrs[i].p_offset
1189 = pSymFile->aPhdrs[i].p_vaddr
1190 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1191 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1192 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1193 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1194 i++;
1195
1196 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1197
1198 /*
1199 * The dynamic section:
1200 */
1201 i = 0;
1202 pSymFile->aDyn[i].d_tag = DT_SONAME;
1203 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1204 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1205 i++;
1206 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1207 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1208 i++;
1209 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1210 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1211 i++;
1212 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1213 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1214 i++;
1215 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1216 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1217 i++;
1218 pSymFile->aDyn[i].d_tag = DT_NULL;
1219 i++;
1220 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1221# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1222
1223 /*
1224 * Symbol tables:
1225 */
1226 /** @todo gdb doesn't seem to really like this ... */
1227 i = 0;
1228 pSymFile->aSymbols[i].st_name = 0;
1229 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1230 pSymFile->aSymbols[i].st_value = 0;
1231 pSymFile->aSymbols[i].st_size = 0;
1232 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1233 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1234# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1235 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1236# endif
1237 i++;
1238
1239 pSymFile->aSymbols[i].st_name = 0;
1240 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1241 pSymFile->aSymbols[i].st_value = 0;
1242 pSymFile->aSymbols[i].st_size = 0;
1243 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1244 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1245 i++;
1246
1247 pSymFile->aSymbols[i].st_name = offStrTab;
1248 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1249# if 0
1250 pSymFile->aSymbols[i].st_shndx = iShText;
1251 pSymFile->aSymbols[i].st_value = 0;
1252# else
1253 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1254 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1255# endif
1256 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1257 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1258 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1259# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1260 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1261 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1262# endif
1263 i++;
1264
1265 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1266 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1267
1268 /*
1269 * The GDB JIT entry and informing GDB.
1270 */
1271 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1272# if 1
1273 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1274# else
1275 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1276# endif
1277
1278 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1279 RTCritSectEnter(&g_IemNativeGdbJitLock);
1280 pEhFrame->GdbJitEntry.pNext = NULL;
1281 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1282 if (__jit_debug_descriptor.pTail)
1283 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1284 else
1285 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1286 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1287 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1288
1289 /* Notify GDB: */
1290 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1291 __jit_debug_register_code();
1292 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1293 RTCritSectLeave(&g_IemNativeGdbJitLock);
1294
1295# else /* !IEMNATIVE_USE_GDB_JIT */
1296 RT_NOREF(pVCpu);
1297# endif /* !IEMNATIVE_USE_GDB_JIT */
1298
1299 return VINF_SUCCESS;
1300}
1301
1302# endif /* !RT_OS_WINDOWS */
1303#endif /* IN_RING3 */
1304
1305
1306/**
1307 * Adds another chunk to the executable memory allocator.
1308 *
1309 * This is used by the init code for the initial allocation and later by the
1310 * regular allocator function when it's out of memory.
1311 */
1312static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1313{
1314 /* Check that we've room for growth. */
1315 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1316 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1317
1318 /* Allocate a chunk. */
1319#ifdef RT_OS_DARWIN
1320 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1321#else
1322 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1323#endif
1324 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1325
1326#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1327 int rc = VINF_SUCCESS;
1328#else
1329 /* Initialize the heap for the chunk. */
1330 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1331 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1332 AssertRC(rc);
1333 if (RT_SUCCESS(rc))
1334 {
1335 /*
1336 * We want the memory to be aligned on 64 byte, so the first time thru
1337 * here we do some exploratory allocations to see how we can achieve this.
1338 * On subsequent runs we only make an initial adjustment allocation, if
1339 * necessary.
1340 *
1341 * Since we own the heap implementation, we know that the internal block
1342 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1343 * so all we need to wrt allocation size adjustments is to add 32 bytes
1344 * to the size, align up by 64 bytes, and subtract 32 bytes.
1345 *
1346 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1347 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1348 * allocation to force subsequent allocations to return 64 byte aligned
1349 * user areas.
1350 */
1351 if (!pExecMemAllocator->cbHeapBlockHdr)
1352 {
1353 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1354 pExecMemAllocator->cbHeapAlignTweak = 64;
1355 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1356 32 /*cbAlignment*/);
1357 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1358
1359 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1360 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1361 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1362 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1363 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1364
1365 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1366 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1367 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1368 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1369 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1370
1371 RTHeapSimpleFree(hHeap, pvTest2);
1372 RTHeapSimpleFree(hHeap, pvTest1);
1373 }
1374 else
1375 {
1376 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1377 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1378 }
1379 if (RT_SUCCESS(rc))
1380#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1381 {
1382 /*
1383 * Add the chunk.
1384 *
1385 * This must be done before the unwind init so windows can allocate
1386 * memory from the chunk when using the alternative sub-allocator.
1387 */
1388 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1389#ifdef IN_RING3
1390 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1391#endif
1392#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1393 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1394#else
1395 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1396 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1397 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1398 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1399#endif
1400
1401 pExecMemAllocator->cChunks = idxChunk + 1;
1402 pExecMemAllocator->idxChunkHint = idxChunk;
1403
1404#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1405 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1406 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1407#else
1408 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1409 pExecMemAllocator->cbTotal += cbFree;
1410 pExecMemAllocator->cbFree += cbFree;
1411#endif
1412
1413#ifdef IN_RING3
1414 /*
1415 * Initialize the unwind information (this cannot really fail atm).
1416 * (This sets pvUnwindInfo.)
1417 */
1418 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1419 if (RT_SUCCESS(rc))
1420#endif
1421 {
1422 return VINF_SUCCESS;
1423 }
1424
1425#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1426 /* Just in case the impossible happens, undo the above up: */
1427 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1428 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1429 pExecMemAllocator->cChunks = idxChunk;
1430 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1431 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1432 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1433 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1434#endif
1435 }
1436#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1437 }
1438#endif
1439 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1440 RT_NOREF(pVCpu);
1441 return rc;
1442}
1443
1444
1445/**
1446 * Initializes the executable memory allocator for native recompilation on the
1447 * calling EMT.
1448 *
1449 * @returns VBox status code.
1450 * @param pVCpu The cross context virtual CPU structure of the calling
1451 * thread.
1452 * @param cbMax The max size of the allocator.
1453 * @param cbInitial The initial allocator size.
1454 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1455 * dependent).
1456 */
1457int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1458{
1459 /*
1460 * Validate input.
1461 */
1462 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1463 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1464 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1465 || cbChunk == 0
1466 || ( RT_IS_POWER_OF_TWO(cbChunk)
1467 && cbChunk >= _1M
1468 && cbChunk <= _256M
1469 && cbChunk <= cbMax),
1470 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1471 VERR_OUT_OF_RANGE);
1472
1473 /*
1474 * Adjust/figure out the chunk size.
1475 */
1476 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1477 {
1478 if (cbMax >= _256M)
1479 cbChunk = _64M;
1480 else
1481 {
1482 if (cbMax < _16M)
1483 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1484 else
1485 cbChunk = (uint32_t)cbMax / 4;
1486 if (!RT_IS_POWER_OF_TWO(cbChunk))
1487 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1488 }
1489 }
1490
1491 if (cbChunk > cbMax)
1492 cbMax = cbChunk;
1493 else
1494 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1495 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1496 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1497
1498 /*
1499 * Allocate and initialize the allocatore instance.
1500 */
1501 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1502#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1503 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1504 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1505 cbNeeded += cbBitmap * cMaxChunks;
1506 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1507 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1508#endif
1509#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1510 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1511 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1512#endif
1513 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1514 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1515 VERR_NO_MEMORY);
1516 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1517 pExecMemAllocator->cbChunk = cbChunk;
1518 pExecMemAllocator->cMaxChunks = cMaxChunks;
1519 pExecMemAllocator->cChunks = 0;
1520 pExecMemAllocator->idxChunkHint = 0;
1521 pExecMemAllocator->cAllocations = 0;
1522 pExecMemAllocator->cbTotal = 0;
1523 pExecMemAllocator->cbFree = 0;
1524 pExecMemAllocator->cbAllocated = 0;
1525#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1526 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1527 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1528 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1529 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1530#endif
1531#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1532 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1533#endif
1534 for (uint32_t i = 0; i < cMaxChunks; i++)
1535 {
1536#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1537 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1538 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1539#else
1540 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1541#endif
1542 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1543#ifdef IN_RING0
1544 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1545#else
1546 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1547#endif
1548 }
1549 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1550
1551 /*
1552 * Do the initial allocations.
1553 */
1554 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1555 {
1556 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1557 AssertLogRelRCReturn(rc, rc);
1558 }
1559
1560 pExecMemAllocator->idxChunkHint = 0;
1561
1562 return VINF_SUCCESS;
1563}
1564
1565
1566/*********************************************************************************************************************************
1567* Native Recompilation *
1568*********************************************************************************************************************************/
1569
1570
1571/**
1572 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1573 */
1574IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1575{
1576 pVCpu->iem.s.cInstructions += idxInstr;
1577 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1578}
1579
1580
1581/**
1582 * Used by TB code when it wants to raise a \#DE.
1583 */
1584IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseDe,(PVMCPUCC pVCpu))
1585{
1586 iemRaiseDivideErrorJmp(pVCpu);
1587#ifndef _MSC_VER
1588 return VINF_IEM_RAISED_XCPT; /* not reached */
1589#endif
1590}
1591
1592
1593/**
1594 * Used by TB code when it wants to raise a \#UD.
1595 */
1596IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
1597{
1598 iemRaiseUndefinedOpcodeJmp(pVCpu);
1599#ifndef _MSC_VER
1600 return VINF_IEM_RAISED_XCPT; /* not reached */
1601#endif
1602}
1603
1604
1605/**
1606 * Used by TB code when it wants to raise an SSE related \#UD or \#NM.
1607 *
1608 * See IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT.
1609 */
1610IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseRelated,(PVMCPUCC pVCpu))
1611{
1612 if ( (pVCpu->cpum.GstCtx.cr0 & X86_CR0_EM)
1613 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSFXSR))
1614 iemRaiseUndefinedOpcodeJmp(pVCpu);
1615 else
1616 iemRaiseDeviceNotAvailableJmp(pVCpu);
1617#ifndef _MSC_VER
1618 return VINF_IEM_RAISED_XCPT; /* not reached */
1619#endif
1620}
1621
1622
1623/**
1624 * Used by TB code when it wants to raise an AVX related \#UD or \#NM.
1625 *
1626 * See IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT.
1627 */
1628IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseAvxRelated,(PVMCPUCC pVCpu))
1629{
1630 if ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE)
1631 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE))
1632 iemRaiseUndefinedOpcodeJmp(pVCpu);
1633 else
1634 iemRaiseDeviceNotAvailableJmp(pVCpu);
1635#ifndef _MSC_VER
1636 return VINF_IEM_RAISED_XCPT; /* not reached */
1637#endif
1638}
1639
1640
1641/**
1642 * Used by TB code when it wants to raise a \#NM.
1643 */
1644IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
1645{
1646 iemRaiseDeviceNotAvailableJmp(pVCpu);
1647#ifndef _MSC_VER
1648 return VINF_IEM_RAISED_XCPT; /* not reached */
1649#endif
1650}
1651
1652
1653/**
1654 * Used by TB code when it wants to raise a \#GP(0).
1655 */
1656IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
1657{
1658 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1659#ifndef _MSC_VER
1660 return VINF_IEM_RAISED_XCPT; /* not reached */
1661#endif
1662}
1663
1664
1665/**
1666 * Used by TB code when it wants to raise a \#MF.
1667 */
1668IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
1669{
1670 iemRaiseMathFaultJmp(pVCpu);
1671#ifndef _MSC_VER
1672 return VINF_IEM_RAISED_XCPT; /* not reached */
1673#endif
1674}
1675
1676
1677/**
1678 * Used by TB code when it wants to raise a \#XF.
1679 */
1680IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
1681{
1682 iemRaiseSimdFpExceptionJmp(pVCpu);
1683#ifndef _MSC_VER
1684 return VINF_IEM_RAISED_XCPT; /* not reached */
1685#endif
1686}
1687
1688
1689/**
1690 * Used by TB code when detecting opcode changes.
1691 * @see iemThreadeFuncWorkerObsoleteTb
1692 */
1693IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
1694{
1695 /* We set fSafeToFree to false where as we're being called in the context
1696 of a TB callback function, which for native TBs means we cannot release
1697 the executable memory till we've returned our way back to iemTbExec as
1698 that return path codes via the native code generated for the TB. */
1699 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1700 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
1701 return VINF_IEM_REEXEC_BREAK;
1702}
1703
1704
1705/**
1706 * Used by TB code when we need to switch to a TB with CS.LIM checking.
1707 */
1708IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
1709{
1710 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
1711 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1712 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
1713 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
1714 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
1715 return VINF_IEM_REEXEC_BREAK;
1716}
1717
1718
1719/**
1720 * Used by TB code when we missed a PC check after a branch.
1721 */
1722IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
1723{
1724 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
1725 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1726 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
1727 pVCpu->iem.s.pbInstrBuf));
1728 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
1729 return VINF_IEM_REEXEC_BREAK;
1730}
1731
1732
1733
1734/*********************************************************************************************************************************
1735* Helpers: Segmented memory fetches and stores. *
1736*********************************************************************************************************************************/
1737
1738/**
1739 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1740 */
1741IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1742{
1743#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1744 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1745#else
1746 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1747#endif
1748}
1749
1750
1751/**
1752 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1753 * to 16 bits.
1754 */
1755IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1756{
1757#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1758 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1759#else
1760 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1761#endif
1762}
1763
1764
1765/**
1766 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1767 * to 32 bits.
1768 */
1769IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1770{
1771#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1772 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1773#else
1774 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1775#endif
1776}
1777
1778/**
1779 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1780 * to 64 bits.
1781 */
1782IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1783{
1784#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1785 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1786#else
1787 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1788#endif
1789}
1790
1791
1792/**
1793 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1794 */
1795IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1796{
1797#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1798 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1799#else
1800 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1801#endif
1802}
1803
1804
1805/**
1806 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1807 * to 32 bits.
1808 */
1809IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1810{
1811#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1812 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1813#else
1814 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1815#endif
1816}
1817
1818
1819/**
1820 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1821 * to 64 bits.
1822 */
1823IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1824{
1825#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1826 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1827#else
1828 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1829#endif
1830}
1831
1832
1833/**
1834 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1835 */
1836IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1837{
1838#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1839 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1840#else
1841 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1842#endif
1843}
1844
1845
1846/**
1847 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
1848 * to 64 bits.
1849 */
1850IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1851{
1852#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1853 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1854#else
1855 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1856#endif
1857}
1858
1859
1860/**
1861 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1862 */
1863IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1864{
1865#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1866 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
1867#else
1868 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
1869#endif
1870}
1871
1872
1873#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1874/**
1875 * Used by TB code to load 128-bit data w/ segmentation.
1876 */
1877IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
1878{
1879#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1880 iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1881#else
1882 iemMemFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1883#endif
1884}
1885
1886
1887/**
1888 * Used by TB code to load 128-bit data w/ segmentation.
1889 */
1890IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
1891{
1892#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1893 iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1894#else
1895 iemMemFetchDataU128NoAcJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1896#endif
1897}
1898#endif
1899
1900
1901/**
1902 * Used by TB code to store unsigned 8-bit data w/ segmentation.
1903 */
1904IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
1905{
1906#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1907 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1908#else
1909 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1910#endif
1911}
1912
1913
1914/**
1915 * Used by TB code to store unsigned 16-bit data w/ segmentation.
1916 */
1917IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
1918{
1919#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1920 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1921#else
1922 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1923#endif
1924}
1925
1926
1927/**
1928 * Used by TB code to store unsigned 32-bit data w/ segmentation.
1929 */
1930IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
1931{
1932#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1933 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1934#else
1935 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1936#endif
1937}
1938
1939
1940/**
1941 * Used by TB code to store unsigned 64-bit data w/ segmentation.
1942 */
1943IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
1944{
1945#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1946 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1947#else
1948 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1949#endif
1950}
1951
1952
1953
1954/**
1955 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
1956 */
1957IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1958{
1959#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1960 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1961#else
1962 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1963#endif
1964}
1965
1966
1967/**
1968 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
1969 */
1970IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1971{
1972#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1973 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1974#else
1975 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1976#endif
1977}
1978
1979
1980/**
1981 * Used by TB code to store an 32-bit selector value onto a generic stack.
1982 *
1983 * Intel CPUs doesn't do write a whole dword, thus the special function.
1984 */
1985IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1986{
1987#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1988 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1989#else
1990 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1991#endif
1992}
1993
1994
1995/**
1996 * Used by TB code to push unsigned 64-bit value onto a generic stack.
1997 */
1998IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1999{
2000#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2001 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2002#else
2003 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2004#endif
2005}
2006
2007
2008/**
2009 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2010 */
2011IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2012{
2013#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2014 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2015#else
2016 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
2017#endif
2018}
2019
2020
2021/**
2022 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2023 */
2024IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2025{
2026#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2027 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2028#else
2029 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
2030#endif
2031}
2032
2033
2034/**
2035 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2036 */
2037IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2038{
2039#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2040 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2041#else
2042 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
2043#endif
2044}
2045
2046
2047
2048/*********************************************************************************************************************************
2049* Helpers: Flat memory fetches and stores. *
2050*********************************************************************************************************************************/
2051
2052/**
2053 * Used by TB code to load unsigned 8-bit data w/ flat address.
2054 * @note Zero extending the value to 64-bit to simplify assembly.
2055 */
2056IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2057{
2058#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2059 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2060#else
2061 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2062#endif
2063}
2064
2065
2066/**
2067 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2068 * to 16 bits.
2069 * @note Zero extending the value to 64-bit to simplify assembly.
2070 */
2071IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2072{
2073#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2074 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2075#else
2076 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2077#endif
2078}
2079
2080
2081/**
2082 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2083 * to 32 bits.
2084 * @note Zero extending the value to 64-bit to simplify assembly.
2085 */
2086IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2087{
2088#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2089 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2090#else
2091 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2092#endif
2093}
2094
2095
2096/**
2097 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2098 * to 64 bits.
2099 */
2100IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2101{
2102#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2103 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2104#else
2105 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2106#endif
2107}
2108
2109
2110/**
2111 * Used by TB code to load unsigned 16-bit data w/ flat address.
2112 * @note Zero extending the value to 64-bit to simplify assembly.
2113 */
2114IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2115{
2116#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2117 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2118#else
2119 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2120#endif
2121}
2122
2123
2124/**
2125 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2126 * to 32 bits.
2127 * @note Zero extending the value to 64-bit to simplify assembly.
2128 */
2129IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2130{
2131#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2132 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2133#else
2134 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2135#endif
2136}
2137
2138
2139/**
2140 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2141 * to 64 bits.
2142 * @note Zero extending the value to 64-bit to simplify assembly.
2143 */
2144IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2145{
2146#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2147 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2148#else
2149 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2150#endif
2151}
2152
2153
2154/**
2155 * Used by TB code to load unsigned 32-bit data w/ flat address.
2156 * @note Zero extending the value to 64-bit to simplify assembly.
2157 */
2158IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2159{
2160#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2161 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2162#else
2163 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2164#endif
2165}
2166
2167
2168/**
2169 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
2170 * to 64 bits.
2171 * @note Zero extending the value to 64-bit to simplify assembly.
2172 */
2173IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2174{
2175#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2176 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2177#else
2178 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2179#endif
2180}
2181
2182
2183/**
2184 * Used by TB code to load unsigned 64-bit data w/ flat address.
2185 */
2186IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2187{
2188#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2189 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2190#else
2191 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
2192#endif
2193}
2194
2195
2196#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2197/**
2198 * Used by TB code to load unsigned 128-bit data w/ flat address.
2199 */
2200IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
2201{
2202#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2203 return iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2204#else
2205 return iemMemFlatFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2206#endif
2207}
2208
2209
2210/**
2211 * Used by TB code to load unsigned 128-bit data w/ flat address.
2212 */
2213IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
2214{
2215#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2216 return iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2217#else
2218 return iemMemFlatFetchDataU128NoAcJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2219#endif
2220}
2221#endif
2222
2223
2224/**
2225 * Used by TB code to store unsigned 8-bit data w/ flat address.
2226 */
2227IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
2228{
2229#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2230 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
2231#else
2232 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
2233#endif
2234}
2235
2236
2237/**
2238 * Used by TB code to store unsigned 16-bit data w/ flat address.
2239 */
2240IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2241{
2242#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2243 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
2244#else
2245 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
2246#endif
2247}
2248
2249
2250/**
2251 * Used by TB code to store unsigned 32-bit data w/ flat address.
2252 */
2253IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2254{
2255#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2256 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
2257#else
2258 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
2259#endif
2260}
2261
2262
2263/**
2264 * Used by TB code to store unsigned 64-bit data w/ flat address.
2265 */
2266IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2267{
2268#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2269 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
2270#else
2271 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
2272#endif
2273}
2274
2275
2276
2277/**
2278 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
2279 */
2280IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2281{
2282#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2283 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2284#else
2285 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2286#endif
2287}
2288
2289
2290/**
2291 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
2292 */
2293IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2294{
2295#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2296 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2297#else
2298 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2299#endif
2300}
2301
2302
2303/**
2304 * Used by TB code to store a segment selector value onto a flat stack.
2305 *
2306 * Intel CPUs doesn't do write a whole dword, thus the special function.
2307 */
2308IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2309{
2310#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2311 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2312#else
2313 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2314#endif
2315}
2316
2317
2318/**
2319 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
2320 */
2321IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2322{
2323#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2324 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2325#else
2326 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2327#endif
2328}
2329
2330
2331/**
2332 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2333 */
2334IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2335{
2336#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2337 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2338#else
2339 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
2340#endif
2341}
2342
2343
2344/**
2345 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2346 */
2347IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2348{
2349#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2350 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2351#else
2352 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
2353#endif
2354}
2355
2356
2357/**
2358 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2359 */
2360IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2361{
2362#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2363 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2364#else
2365 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
2366#endif
2367}
2368
2369
2370
2371/*********************************************************************************************************************************
2372* Helpers: Segmented memory mapping. *
2373*********************************************************************************************************************************/
2374
2375/**
2376 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
2377 * segmentation.
2378 */
2379IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2380 RTGCPTR GCPtrMem, uint8_t iSegReg))
2381{
2382#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2383 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2384#else
2385 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2386#endif
2387}
2388
2389
2390/**
2391 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
2392 */
2393IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2394 RTGCPTR GCPtrMem, uint8_t iSegReg))
2395{
2396#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2397 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2398#else
2399 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2400#endif
2401}
2402
2403
2404/**
2405 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
2406 */
2407IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2408 RTGCPTR GCPtrMem, uint8_t iSegReg))
2409{
2410#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2411 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2412#else
2413 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2414#endif
2415}
2416
2417
2418/**
2419 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
2420 */
2421IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2422 RTGCPTR GCPtrMem, uint8_t iSegReg))
2423{
2424#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2425 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2426#else
2427 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2428#endif
2429}
2430
2431
2432/**
2433 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
2434 * segmentation.
2435 */
2436IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2437 RTGCPTR GCPtrMem, uint8_t iSegReg))
2438{
2439#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2440 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2441#else
2442 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2443#endif
2444}
2445
2446
2447/**
2448 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
2449 */
2450IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2451 RTGCPTR GCPtrMem, uint8_t iSegReg))
2452{
2453#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2454 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2455#else
2456 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2457#endif
2458}
2459
2460
2461/**
2462 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
2463 */
2464IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2465 RTGCPTR GCPtrMem, uint8_t iSegReg))
2466{
2467#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2468 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2469#else
2470 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2471#endif
2472}
2473
2474
2475/**
2476 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
2477 */
2478IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2479 RTGCPTR GCPtrMem, uint8_t iSegReg))
2480{
2481#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2482 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2483#else
2484 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2485#endif
2486}
2487
2488
2489/**
2490 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
2491 * segmentation.
2492 */
2493IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2494 RTGCPTR GCPtrMem, uint8_t iSegReg))
2495{
2496#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2497 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2498#else
2499 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2500#endif
2501}
2502
2503
2504/**
2505 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
2506 */
2507IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2508 RTGCPTR GCPtrMem, uint8_t iSegReg))
2509{
2510#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2511 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2512#else
2513 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2514#endif
2515}
2516
2517
2518/**
2519 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
2520 */
2521IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2522 RTGCPTR GCPtrMem, uint8_t iSegReg))
2523{
2524#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2525 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2526#else
2527 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2528#endif
2529}
2530
2531
2532/**
2533 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
2534 */
2535IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2536 RTGCPTR GCPtrMem, uint8_t iSegReg))
2537{
2538#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2539 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2540#else
2541 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2542#endif
2543}
2544
2545
2546/**
2547 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
2548 * segmentation.
2549 */
2550IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2551 RTGCPTR GCPtrMem, uint8_t iSegReg))
2552{
2553#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2554 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2555#else
2556 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2557#endif
2558}
2559
2560
2561/**
2562 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
2563 */
2564IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2565 RTGCPTR GCPtrMem, uint8_t iSegReg))
2566{
2567#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2568 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2569#else
2570 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2571#endif
2572}
2573
2574
2575/**
2576 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
2577 */
2578IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2579 RTGCPTR GCPtrMem, uint8_t iSegReg))
2580{
2581#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2582 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2583#else
2584 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2585#endif
2586}
2587
2588
2589/**
2590 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
2591 */
2592IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2593 RTGCPTR GCPtrMem, uint8_t iSegReg))
2594{
2595#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2596 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2597#else
2598 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2599#endif
2600}
2601
2602
2603/**
2604 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
2605 */
2606IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2607 RTGCPTR GCPtrMem, uint8_t iSegReg))
2608{
2609#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2610 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2611#else
2612 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2613#endif
2614}
2615
2616
2617/**
2618 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
2619 */
2620IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2621 RTGCPTR GCPtrMem, uint8_t iSegReg))
2622{
2623#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2624 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2625#else
2626 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2627#endif
2628}
2629
2630
2631/**
2632 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
2633 * segmentation.
2634 */
2635IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2636 RTGCPTR GCPtrMem, uint8_t iSegReg))
2637{
2638#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2639 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2640#else
2641 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2642#endif
2643}
2644
2645
2646/**
2647 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
2648 */
2649IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2650 RTGCPTR GCPtrMem, uint8_t iSegReg))
2651{
2652#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2653 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2654#else
2655 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2656#endif
2657}
2658
2659
2660/**
2661 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
2662 */
2663IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2664 RTGCPTR GCPtrMem, uint8_t iSegReg))
2665{
2666#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2667 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2668#else
2669 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2670#endif
2671}
2672
2673
2674/**
2675 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
2676 */
2677IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2678 RTGCPTR GCPtrMem, uint8_t iSegReg))
2679{
2680#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2681 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2682#else
2683 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2684#endif
2685}
2686
2687
2688/*********************************************************************************************************************************
2689* Helpers: Flat memory mapping. *
2690*********************************************************************************************************************************/
2691
2692/**
2693 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
2694 * address.
2695 */
2696IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2697{
2698#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2699 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2700#else
2701 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2702#endif
2703}
2704
2705
2706/**
2707 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
2708 */
2709IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2710{
2711#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2712 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2713#else
2714 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2715#endif
2716}
2717
2718
2719/**
2720 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
2721 */
2722IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2723{
2724#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2725 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2726#else
2727 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2728#endif
2729}
2730
2731
2732/**
2733 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
2734 */
2735IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2736{
2737#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2738 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2739#else
2740 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2741#endif
2742}
2743
2744
2745/**
2746 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
2747 * address.
2748 */
2749IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2750{
2751#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2752 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2753#else
2754 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2755#endif
2756}
2757
2758
2759/**
2760 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
2761 */
2762IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2763{
2764#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2765 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2766#else
2767 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2768#endif
2769}
2770
2771
2772/**
2773 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
2774 */
2775IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2776{
2777#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2778 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2779#else
2780 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2781#endif
2782}
2783
2784
2785/**
2786 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
2787 */
2788IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2789{
2790#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2791 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2792#else
2793 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2794#endif
2795}
2796
2797
2798/**
2799 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
2800 * address.
2801 */
2802IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2803{
2804#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2805 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2806#else
2807 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2808#endif
2809}
2810
2811
2812/**
2813 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
2814 */
2815IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2816{
2817#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2818 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2819#else
2820 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2821#endif
2822}
2823
2824
2825/**
2826 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
2827 */
2828IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2829{
2830#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2831 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2832#else
2833 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2834#endif
2835}
2836
2837
2838/**
2839 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
2840 */
2841IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2842{
2843#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2844 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2845#else
2846 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2847#endif
2848}
2849
2850
2851/**
2852 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
2853 * address.
2854 */
2855IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2856{
2857#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2858 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2859#else
2860 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2861#endif
2862}
2863
2864
2865/**
2866 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
2867 */
2868IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2869{
2870#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2871 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2872#else
2873 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2874#endif
2875}
2876
2877
2878/**
2879 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
2880 */
2881IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2882{
2883#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2884 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2885#else
2886 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2887#endif
2888}
2889
2890
2891/**
2892 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
2893 */
2894IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2895{
2896#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2897 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2898#else
2899 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2900#endif
2901}
2902
2903
2904/**
2905 * Used by TB code to map 80-bit float data writeonly w/ flat address.
2906 */
2907IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2908{
2909#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2910 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2911#else
2912 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2913#endif
2914}
2915
2916
2917/**
2918 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
2919 */
2920IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2921{
2922#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2923 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2924#else
2925 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2926#endif
2927}
2928
2929
2930/**
2931 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
2932 * address.
2933 */
2934IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2935{
2936#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2937 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2938#else
2939 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2940#endif
2941}
2942
2943
2944/**
2945 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
2946 */
2947IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2948{
2949#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2950 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2951#else
2952 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2953#endif
2954}
2955
2956
2957/**
2958 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
2959 */
2960IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2961{
2962#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2963 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2964#else
2965 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2966#endif
2967}
2968
2969
2970/**
2971 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
2972 */
2973IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2974{
2975#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2976 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2977#else
2978 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2979#endif
2980}
2981
2982
2983/*********************************************************************************************************************************
2984* Helpers: Commit, rollback & unmap *
2985*********************************************************************************************************************************/
2986
2987/**
2988 * Used by TB code to commit and unmap a read-write memory mapping.
2989 */
2990IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2991{
2992 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
2993}
2994
2995
2996/**
2997 * Used by TB code to commit and unmap a read-write memory mapping.
2998 */
2999IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3000{
3001 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
3002}
3003
3004
3005/**
3006 * Used by TB code to commit and unmap a write-only memory mapping.
3007 */
3008IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3009{
3010 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
3011}
3012
3013
3014/**
3015 * Used by TB code to commit and unmap a read-only memory mapping.
3016 */
3017IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3018{
3019 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
3020}
3021
3022
3023/**
3024 * Reinitializes the native recompiler state.
3025 *
3026 * Called before starting a new recompile job.
3027 */
3028static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
3029{
3030 pReNative->cLabels = 0;
3031 pReNative->bmLabelTypes = 0;
3032 pReNative->cFixups = 0;
3033#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3034 pReNative->pDbgInfo->cEntries = 0;
3035#endif
3036 pReNative->pTbOrg = pTb;
3037 pReNative->cCondDepth = 0;
3038 pReNative->uCondSeqNo = 0;
3039 pReNative->uCheckIrqSeqNo = 0;
3040 pReNative->uTlbSeqNo = 0;
3041
3042#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3043 pReNative->Core.offPc = 0;
3044 pReNative->Core.cInstrPcUpdateSkipped = 0;
3045#endif
3046#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3047 pReNative->fSimdRaiseXcptChecksEmitted = 0;
3048#endif
3049 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
3050#if IEMNATIVE_HST_GREG_COUNT < 32
3051 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
3052#endif
3053 ;
3054 pReNative->Core.bmHstRegsWithGstShadow = 0;
3055 pReNative->Core.bmGstRegShadows = 0;
3056 pReNative->Core.bmVars = 0;
3057 pReNative->Core.bmStack = 0;
3058 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
3059 pReNative->Core.u64ArgVars = UINT64_MAX;
3060
3061 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 16);
3062 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
3063 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
3064 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
3065 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
3066 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
3067 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
3068 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
3069 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
3070 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
3071 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
3072 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
3073 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
3074 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
3075 pReNative->aidxUniqueLabels[13] = UINT32_MAX;
3076 pReNative->aidxUniqueLabels[14] = UINT32_MAX;
3077 pReNative->aidxUniqueLabels[15] = UINT32_MAX;
3078
3079 /* Full host register reinit: */
3080 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
3081 {
3082 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
3083 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
3084 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
3085 }
3086
3087 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
3088 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
3089#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3090 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
3091#endif
3092#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3093 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
3094#endif
3095#ifdef IEMNATIVE_REG_FIXED_TMP1
3096 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
3097#endif
3098#ifdef IEMNATIVE_REG_FIXED_PC_DBG
3099 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
3100#endif
3101 );
3102 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3103 {
3104 fRegs &= ~RT_BIT_32(idxReg);
3105 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
3106 }
3107
3108 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
3109#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3110 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
3111#endif
3112#ifdef IEMNATIVE_REG_FIXED_TMP0
3113 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3114#endif
3115#ifdef IEMNATIVE_REG_FIXED_TMP1
3116 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
3117#endif
3118#ifdef IEMNATIVE_REG_FIXED_PC_DBG
3119 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
3120#endif
3121
3122#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3123# ifdef RT_ARCH_ARM64
3124 /*
3125 * Arm64 has 32 128-bit registers only, in order to support emulating 256-bit registers we pair
3126 * two real registers statically to one virtual for now, leaving us with only 16 256-bit registers.
3127 * We always pair v0 with v1, v2 with v3, etc. so we mark the higher register as fixed here during init
3128 * and the register allocator assumes that it will be always free when the lower is picked.
3129 */
3130 uint32_t const fFixedAdditional = UINT32_C(0xaaaaaaaa);
3131# else
3132 uint32_t const fFixedAdditional = 0;
3133# endif
3134
3135 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
3136 | fFixedAdditional
3137# if IEMNATIVE_HST_SIMD_REG_COUNT < 32
3138 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
3139# endif
3140 ;
3141 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
3142 pReNative->Core.bmGstSimdRegShadows = 0;
3143 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
3144 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
3145
3146 /* Full host register reinit: */
3147 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
3148 {
3149 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
3150 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
3151 pReNative->Core.aHstSimdRegs[i].idxVar = UINT8_MAX;
3152 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
3153 }
3154
3155 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK | fFixedAdditional;
3156 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3157 {
3158 fRegs &= ~RT_BIT_32(idxReg);
3159 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
3160 }
3161
3162#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
3163 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3164#endif
3165
3166#endif
3167
3168 return pReNative;
3169}
3170
3171
3172/**
3173 * Allocates and initializes the native recompiler state.
3174 *
3175 * This is called the first time an EMT wants to recompile something.
3176 *
3177 * @returns Pointer to the new recompiler state.
3178 * @param pVCpu The cross context virtual CPU structure of the calling
3179 * thread.
3180 * @param pTb The TB that's about to be recompiled.
3181 * @thread EMT(pVCpu)
3182 */
3183static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
3184{
3185 VMCPU_ASSERT_EMT(pVCpu);
3186
3187 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
3188 AssertReturn(pReNative, NULL);
3189
3190 /*
3191 * Try allocate all the buffers and stuff we need.
3192 */
3193 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
3194 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
3195 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
3196#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3197 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
3198#endif
3199 if (RT_LIKELY( pReNative->pInstrBuf
3200 && pReNative->paLabels
3201 && pReNative->paFixups)
3202#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3203 && pReNative->pDbgInfo
3204#endif
3205 )
3206 {
3207 /*
3208 * Set the buffer & array sizes on success.
3209 */
3210 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
3211 pReNative->cLabelsAlloc = _8K;
3212 pReNative->cFixupsAlloc = _16K;
3213#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3214 pReNative->cDbgInfoAlloc = _16K;
3215#endif
3216
3217 /* Other constant stuff: */
3218 pReNative->pVCpu = pVCpu;
3219
3220 /*
3221 * Done, just need to save it and reinit it.
3222 */
3223 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
3224 return iemNativeReInit(pReNative, pTb);
3225 }
3226
3227 /*
3228 * Failed. Cleanup and return.
3229 */
3230 AssertFailed();
3231 RTMemFree(pReNative->pInstrBuf);
3232 RTMemFree(pReNative->paLabels);
3233 RTMemFree(pReNative->paFixups);
3234#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3235 RTMemFree(pReNative->pDbgInfo);
3236#endif
3237 RTMemFree(pReNative);
3238 return NULL;
3239}
3240
3241
3242/**
3243 * Creates a label
3244 *
3245 * If the label does not yet have a defined position,
3246 * call iemNativeLabelDefine() later to set it.
3247 *
3248 * @returns Label ID. Throws VBox status code on failure, so no need to check
3249 * the return value.
3250 * @param pReNative The native recompile state.
3251 * @param enmType The label type.
3252 * @param offWhere The instruction offset of the label. UINT32_MAX if the
3253 * label is not yet defined (default).
3254 * @param uData Data associated with the lable. Only applicable to
3255 * certain type of labels. Default is zero.
3256 */
3257DECL_HIDDEN_THROW(uint32_t)
3258iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3259 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
3260{
3261 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
3262
3263 /*
3264 * Locate existing label definition.
3265 *
3266 * This is only allowed for forward declarations where offWhere=UINT32_MAX
3267 * and uData is zero.
3268 */
3269 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3270 uint32_t const cLabels = pReNative->cLabels;
3271 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
3272#ifndef VBOX_STRICT
3273 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
3274 && offWhere == UINT32_MAX
3275 && uData == 0
3276#endif
3277 )
3278 {
3279#ifndef VBOX_STRICT
3280 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
3281 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3282 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
3283 if (idxLabel < pReNative->cLabels)
3284 return idxLabel;
3285#else
3286 for (uint32_t i = 0; i < cLabels; i++)
3287 if ( paLabels[i].enmType == enmType
3288 && paLabels[i].uData == uData)
3289 {
3290 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3291 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3292 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
3293 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
3294 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3295 return i;
3296 }
3297 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
3298 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3299#endif
3300 }
3301
3302 /*
3303 * Make sure we've got room for another label.
3304 */
3305 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
3306 { /* likely */ }
3307 else
3308 {
3309 uint32_t cNew = pReNative->cLabelsAlloc;
3310 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3311 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3312 cNew *= 2;
3313 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
3314 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
3315 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
3316 pReNative->paLabels = paLabels;
3317 pReNative->cLabelsAlloc = cNew;
3318 }
3319
3320 /*
3321 * Define a new label.
3322 */
3323 paLabels[cLabels].off = offWhere;
3324 paLabels[cLabels].enmType = enmType;
3325 paLabels[cLabels].uData = uData;
3326 pReNative->cLabels = cLabels + 1;
3327
3328 Assert((unsigned)enmType < 64);
3329 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
3330
3331 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3332 {
3333 Assert(uData == 0);
3334 pReNative->aidxUniqueLabels[enmType] = cLabels;
3335 }
3336
3337 if (offWhere != UINT32_MAX)
3338 {
3339#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3340 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3341 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
3342#endif
3343 }
3344 return cLabels;
3345}
3346
3347
3348/**
3349 * Defines the location of an existing label.
3350 *
3351 * @param pReNative The native recompile state.
3352 * @param idxLabel The label to define.
3353 * @param offWhere The position.
3354 */
3355DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
3356{
3357 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
3358 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
3359 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
3360 pLabel->off = offWhere;
3361#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3362 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3363 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
3364#endif
3365}
3366
3367
3368/**
3369 * Looks up a lable.
3370 *
3371 * @returns Label ID if found, UINT32_MAX if not.
3372 */
3373static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3374 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
3375{
3376 Assert((unsigned)enmType < 64);
3377 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
3378 {
3379 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3380 return pReNative->aidxUniqueLabels[enmType];
3381
3382 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3383 uint32_t const cLabels = pReNative->cLabels;
3384 for (uint32_t i = 0; i < cLabels; i++)
3385 if ( paLabels[i].enmType == enmType
3386 && paLabels[i].uData == uData
3387 && ( paLabels[i].off == offWhere
3388 || offWhere == UINT32_MAX
3389 || paLabels[i].off == UINT32_MAX))
3390 return i;
3391 }
3392 return UINT32_MAX;
3393}
3394
3395
3396/**
3397 * Adds a fixup.
3398 *
3399 * @throws VBox status code (int) on failure.
3400 * @param pReNative The native recompile state.
3401 * @param offWhere The instruction offset of the fixup location.
3402 * @param idxLabel The target label ID for the fixup.
3403 * @param enmType The fixup type.
3404 * @param offAddend Fixup addend if applicable to the type. Default is 0.
3405 */
3406DECL_HIDDEN_THROW(void)
3407iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
3408 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
3409{
3410 Assert(idxLabel <= UINT16_MAX);
3411 Assert((unsigned)enmType <= UINT8_MAX);
3412#ifdef RT_ARCH_ARM64
3413 AssertStmt( enmType != kIemNativeFixupType_RelImm14At5
3414 || pReNative->paLabels[idxLabel].enmType >= kIemNativeLabelType_LastWholeTbBranch,
3415 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_SHORT_JMP_TO_TAIL_LABEL));
3416#endif
3417
3418 /*
3419 * Make sure we've room.
3420 */
3421 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
3422 uint32_t const cFixups = pReNative->cFixups;
3423 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
3424 { /* likely */ }
3425 else
3426 {
3427 uint32_t cNew = pReNative->cFixupsAlloc;
3428 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3429 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3430 cNew *= 2;
3431 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
3432 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
3433 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
3434 pReNative->paFixups = paFixups;
3435 pReNative->cFixupsAlloc = cNew;
3436 }
3437
3438 /*
3439 * Add the fixup.
3440 */
3441 paFixups[cFixups].off = offWhere;
3442 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
3443 paFixups[cFixups].enmType = enmType;
3444 paFixups[cFixups].offAddend = offAddend;
3445 pReNative->cFixups = cFixups + 1;
3446}
3447
3448
3449/**
3450 * Slow code path for iemNativeInstrBufEnsure.
3451 */
3452DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
3453{
3454 /* Double the buffer size till we meet the request. */
3455 uint32_t cNew = pReNative->cInstrBufAlloc;
3456 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
3457 do
3458 cNew *= 2;
3459 while (cNew < off + cInstrReq);
3460
3461 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
3462#ifdef RT_ARCH_ARM64
3463 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
3464#else
3465 uint32_t const cbMaxInstrBuf = _2M;
3466#endif
3467 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
3468
3469 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
3470 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
3471
3472#ifdef VBOX_STRICT
3473 pReNative->offInstrBufChecked = off + cInstrReq;
3474#endif
3475 pReNative->cInstrBufAlloc = cNew;
3476 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
3477}
3478
3479#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3480
3481/**
3482 * Grows the static debug info array used during recompilation.
3483 *
3484 * @returns Pointer to the new debug info block; throws VBox status code on
3485 * failure, so no need to check the return value.
3486 */
3487DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3488{
3489 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
3490 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
3491 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
3492 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
3493 pReNative->pDbgInfo = pDbgInfo;
3494 pReNative->cDbgInfoAlloc = cNew;
3495 return pDbgInfo;
3496}
3497
3498
3499/**
3500 * Adds a new debug info uninitialized entry, returning the pointer to it.
3501 */
3502DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3503{
3504 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
3505 { /* likely */ }
3506 else
3507 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
3508 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
3509}
3510
3511
3512/**
3513 * Debug Info: Adds a native offset record, if necessary.
3514 */
3515DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3516{
3517 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
3518
3519 /*
3520 * Search backwards to see if we've got a similar record already.
3521 */
3522 uint32_t idx = pDbgInfo->cEntries;
3523 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
3524 while (idx-- > idxStop)
3525 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
3526 {
3527 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
3528 return;
3529 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
3530 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
3531 break;
3532 }
3533
3534 /*
3535 * Add it.
3536 */
3537 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
3538 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
3539 pEntry->NativeOffset.offNative = off;
3540}
3541
3542
3543/**
3544 * Debug Info: Record info about a label.
3545 */
3546static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
3547{
3548 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3549 pEntry->Label.uType = kIemTbDbgEntryType_Label;
3550 pEntry->Label.uUnused = 0;
3551 pEntry->Label.enmLabel = (uint8_t)enmType;
3552 pEntry->Label.uData = uData;
3553}
3554
3555
3556/**
3557 * Debug Info: Record info about a threaded call.
3558 */
3559static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
3560{
3561 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3562 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
3563 pEntry->ThreadedCall.fRecompiled = fRecompiled;
3564 pEntry->ThreadedCall.uUnused = 0;
3565 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
3566}
3567
3568
3569/**
3570 * Debug Info: Record info about a new guest instruction.
3571 */
3572static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
3573{
3574 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3575 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
3576 pEntry->GuestInstruction.uUnused = 0;
3577 pEntry->GuestInstruction.fExec = fExec;
3578}
3579
3580
3581/**
3582 * Debug Info: Record info about guest register shadowing.
3583 */
3584DECL_HIDDEN_THROW(void)
3585iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
3586 uint8_t idxHstReg /*= UINT8_MAX*/, uint8_t idxHstRegPrev /*= UINT8_MAX*/)
3587{
3588 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3589 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
3590 pEntry->GuestRegShadowing.uUnused = 0;
3591 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
3592 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
3593 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
3594}
3595
3596
3597# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3598/**
3599 * Debug Info: Record info about guest register shadowing.
3600 */
3601DECL_HIDDEN_THROW(void)
3602iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
3603 uint8_t idxHstSimdReg /*= UINT8_MAX*/, uint8_t idxHstSimdRegPrev /*= UINT8_MAX*/)
3604{
3605 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3606 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
3607 pEntry->GuestSimdRegShadowing.uUnused = 0;
3608 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
3609 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
3610 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
3611}
3612# endif
3613
3614
3615# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3616/**
3617 * Debug Info: Record info about delayed RIP updates.
3618 */
3619DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t offPc, uint32_t cInstrSkipped)
3620{
3621 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3622 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
3623 pEntry->DelayedPcUpdate.offPc = offPc;
3624 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
3625}
3626# endif
3627
3628#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
3629
3630
3631/*********************************************************************************************************************************
3632* Register Allocator *
3633*********************************************************************************************************************************/
3634
3635/**
3636 * Register parameter indexes (indexed by argument number).
3637 */
3638DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
3639{
3640 IEMNATIVE_CALL_ARG0_GREG,
3641 IEMNATIVE_CALL_ARG1_GREG,
3642 IEMNATIVE_CALL_ARG2_GREG,
3643 IEMNATIVE_CALL_ARG3_GREG,
3644#if defined(IEMNATIVE_CALL_ARG4_GREG)
3645 IEMNATIVE_CALL_ARG4_GREG,
3646# if defined(IEMNATIVE_CALL_ARG5_GREG)
3647 IEMNATIVE_CALL_ARG5_GREG,
3648# if defined(IEMNATIVE_CALL_ARG6_GREG)
3649 IEMNATIVE_CALL_ARG6_GREG,
3650# if defined(IEMNATIVE_CALL_ARG7_GREG)
3651 IEMNATIVE_CALL_ARG7_GREG,
3652# endif
3653# endif
3654# endif
3655#endif
3656};
3657AssertCompile(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3658
3659/**
3660 * Call register masks indexed by argument count.
3661 */
3662DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
3663{
3664 0,
3665 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
3666 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
3667 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
3668 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3669 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
3670#if defined(IEMNATIVE_CALL_ARG4_GREG)
3671 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3672 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
3673# if defined(IEMNATIVE_CALL_ARG5_GREG)
3674 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3675 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
3676# if defined(IEMNATIVE_CALL_ARG6_GREG)
3677 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3678 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3679 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
3680# if defined(IEMNATIVE_CALL_ARG7_GREG)
3681 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3682 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3683 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
3684# endif
3685# endif
3686# endif
3687#endif
3688};
3689
3690#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
3691/**
3692 * BP offset of the stack argument slots.
3693 *
3694 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
3695 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
3696 */
3697DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
3698{
3699 IEMNATIVE_FP_OFF_STACK_ARG0,
3700# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
3701 IEMNATIVE_FP_OFF_STACK_ARG1,
3702# endif
3703# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
3704 IEMNATIVE_FP_OFF_STACK_ARG2,
3705# endif
3706# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
3707 IEMNATIVE_FP_OFF_STACK_ARG3,
3708# endif
3709};
3710AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
3711#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
3712
3713/**
3714 * Info about shadowed guest register values.
3715 * @see IEMNATIVEGSTREG
3716 */
3717DECL_HIDDEN_CONST(IEMANTIVEGSTREGINFO const) g_aGstShadowInfo[] =
3718{
3719#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
3720 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
3721 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
3722 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
3723 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
3724 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
3725 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
3726 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
3727 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
3728 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
3729 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
3730 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
3731 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
3732 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
3733 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
3734 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
3735 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
3736 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
3737 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
3738 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
3739 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
3740 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
3741 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
3742 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
3743 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
3744 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
3745 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
3746 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
3747 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
3748 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
3749 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
3750 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
3751 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
3752 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
3753 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
3754 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
3755 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
3756 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
3757 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
3758 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
3759 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
3760 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
3761 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
3762 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
3763 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
3764 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
3765 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
3766 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
3767 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
3768#undef CPUMCTX_OFF_AND_SIZE
3769};
3770AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
3771
3772
3773/** Host CPU general purpose register names. */
3774DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
3775{
3776#ifdef RT_ARCH_AMD64
3777 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
3778#elif RT_ARCH_ARM64
3779 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
3780 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
3781#else
3782# error "port me"
3783#endif
3784};
3785
3786
3787#if 0 /* unused */
3788/**
3789 * Tries to locate a suitable register in the given register mask.
3790 *
3791 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3792 * failed.
3793 *
3794 * @returns Host register number on success, returns UINT8_MAX on failure.
3795 */
3796static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
3797{
3798 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3799 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3800 if (fRegs)
3801 {
3802 /** @todo pick better here: */
3803 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
3804
3805 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3806 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3807 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3808 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3809
3810 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3811 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3812 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3813 return idxReg;
3814 }
3815 return UINT8_MAX;
3816}
3817#endif /* unused */
3818
3819
3820/**
3821 * Locate a register, possibly freeing one up.
3822 *
3823 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3824 * failed.
3825 *
3826 * @returns Host register number on success. Returns UINT8_MAX if no registers
3827 * found, the caller is supposed to deal with this and raise a
3828 * allocation type specific status code (if desired).
3829 *
3830 * @throws VBox status code if we're run into trouble spilling a variable of
3831 * recording debug info. Does NOT throw anything if we're out of
3832 * registers, though.
3833 */
3834static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3835 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3836{
3837 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
3838 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3839 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3840
3841 /*
3842 * Try a freed register that's shadowing a guest register.
3843 */
3844 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3845 if (fRegs)
3846 {
3847 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
3848
3849#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3850 /*
3851 * When we have livness information, we use it to kick out all shadowed
3852 * guest register that will not be needed any more in this TB. If we're
3853 * lucky, this may prevent us from ending up here again.
3854 *
3855 * Note! We must consider the previous entry here so we don't free
3856 * anything that the current threaded function requires (current
3857 * entry is produced by the next threaded function).
3858 */
3859 uint32_t const idxCurCall = pReNative->idxCurCall;
3860 if (idxCurCall > 0)
3861 {
3862 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
3863
3864# ifndef IEMLIVENESS_EXTENDED_LAYOUT
3865 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
3866 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
3867 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
3868#else
3869 /* Construct a mask of the registers not in the read or write state.
3870 Note! We could skips writes, if they aren't from us, as this is just
3871 a hack to prevent trashing registers that have just been written
3872 or will be written when we retire the current instruction. */
3873 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3874 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3875 & IEMLIVENESSBIT_MASK;
3876#endif
3877 /* Merge EFLAGS. */
3878 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
3879 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
3880 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
3881 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
3882 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
3883
3884 /* If it matches any shadowed registers. */
3885 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
3886 {
3887 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
3888 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
3889 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
3890
3891 /* See if we've got any unshadowed registers we can return now. */
3892 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
3893 if (fUnshadowedRegs)
3894 {
3895 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
3896 return (fPreferVolatile
3897 ? ASMBitFirstSetU32(fUnshadowedRegs)
3898 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3899 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
3900 - 1;
3901 }
3902 }
3903 }
3904#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
3905
3906 unsigned const idxReg = (fPreferVolatile
3907 ? ASMBitFirstSetU32(fRegs)
3908 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3909 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
3910 - 1;
3911
3912 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3913 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3914 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3915 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3916
3917 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3918 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3919 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3920 return idxReg;
3921 }
3922
3923 /*
3924 * Try free up a variable that's in a register.
3925 *
3926 * We do two rounds here, first evacuating variables we don't need to be
3927 * saved on the stack, then in the second round move things to the stack.
3928 */
3929 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
3930 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3931 {
3932 uint32_t fVars = pReNative->Core.bmVars;
3933 while (fVars)
3934 {
3935 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3936 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3937 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3938 && (RT_BIT_32(idxReg) & fRegMask)
3939 && ( iLoop == 0
3940 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3941 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3942 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3943 {
3944 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3945 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3946 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3947 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3948 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3949 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3950
3951 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3952 {
3953 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
3954 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3955 }
3956
3957 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3958 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3959
3960 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3961 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3962 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3963 return idxReg;
3964 }
3965 fVars &= ~RT_BIT_32(idxVar);
3966 }
3967 }
3968
3969 return UINT8_MAX;
3970}
3971
3972
3973/**
3974 * Reassigns a variable to a different register specified by the caller.
3975 *
3976 * @returns The new code buffer position.
3977 * @param pReNative The native recompile state.
3978 * @param off The current code buffer position.
3979 * @param idxVar The variable index.
3980 * @param idxRegOld The old host register number.
3981 * @param idxRegNew The new host register number.
3982 * @param pszCaller The caller for logging.
3983 */
3984static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3985 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3986{
3987 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3988 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
3989#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3990 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3991#endif
3992 RT_NOREF(pszCaller);
3993
3994 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3995
3996 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3997 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
3998 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3999 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
4000
4001 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
4002 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
4003 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
4004 if (fGstRegShadows)
4005 {
4006 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
4007 | RT_BIT_32(idxRegNew);
4008 while (fGstRegShadows)
4009 {
4010 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4011 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4012
4013 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
4014 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
4015 }
4016 }
4017
4018 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
4019 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
4020 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
4021 return off;
4022}
4023
4024
4025/**
4026 * Moves a variable to a different register or spills it onto the stack.
4027 *
4028 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
4029 * kinds can easily be recreated if needed later.
4030 *
4031 * @returns The new code buffer position.
4032 * @param pReNative The native recompile state.
4033 * @param off The current code buffer position.
4034 * @param idxVar The variable index.
4035 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
4036 * call-volatile registers.
4037 */
4038DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4039 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_GREG_MASK*/)
4040{
4041 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4042 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4043 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
4044 Assert(!pVar->fRegAcquired);
4045
4046 uint8_t const idxRegOld = pVar->idxReg;
4047 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
4048 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
4049 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
4050 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
4051 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
4052 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4053 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
4054 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
4055
4056
4057 /** @todo Add statistics on this.*/
4058 /** @todo Implement basic variable liveness analysis (python) so variables
4059 * can be freed immediately once no longer used. This has the potential to
4060 * be trashing registers and stack for dead variables.
4061 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
4062
4063 /*
4064 * First try move it to a different register, as that's cheaper.
4065 */
4066 fForbiddenRegs |= RT_BIT_32(idxRegOld);
4067 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
4068 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
4069 if (fRegs)
4070 {
4071 /* Avoid using shadow registers, if possible. */
4072 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
4073 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
4074 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
4075 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
4076 }
4077
4078 /*
4079 * Otherwise we must spill the register onto the stack.
4080 */
4081 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
4082 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
4083 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
4084 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
4085
4086 pVar->idxReg = UINT8_MAX;
4087 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
4088 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
4089 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
4090 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
4091 return off;
4092}
4093
4094
4095/**
4096 * Allocates a temporary host general purpose register.
4097 *
4098 * This may emit code to save register content onto the stack in order to free
4099 * up a register.
4100 *
4101 * @returns The host register number; throws VBox status code on failure,
4102 * so no need to check the return value.
4103 * @param pReNative The native recompile state.
4104 * @param poff Pointer to the variable with the code buffer position.
4105 * This will be update if we need to move a variable from
4106 * register to stack in order to satisfy the request.
4107 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4108 * registers (@c true, default) or the other way around
4109 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4110 */
4111DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
4112{
4113 /*
4114 * Try find a completely unused register, preferably a call-volatile one.
4115 */
4116 uint8_t idxReg;
4117 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4118 & ~pReNative->Core.bmHstRegsWithGstShadow
4119 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
4120 if (fRegs)
4121 {
4122 if (fPreferVolatile)
4123 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4124 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4125 else
4126 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4127 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4128 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4129 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4130 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4131 }
4132 else
4133 {
4134 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
4135 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4136 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4137 }
4138 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4139}
4140
4141
4142/**
4143 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
4144 * registers.
4145 *
4146 * @returns The host register number; throws VBox status code on failure,
4147 * so no need to check the return value.
4148 * @param pReNative The native recompile state.
4149 * @param poff Pointer to the variable with the code buffer position.
4150 * This will be update if we need to move a variable from
4151 * register to stack in order to satisfy the request.
4152 * @param fRegMask Mask of acceptable registers.
4153 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4154 * registers (@c true, default) or the other way around
4155 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4156 */
4157DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
4158 bool fPreferVolatile /*= true*/)
4159{
4160 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
4161 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
4162
4163 /*
4164 * Try find a completely unused register, preferably a call-volatile one.
4165 */
4166 uint8_t idxReg;
4167 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4168 & ~pReNative->Core.bmHstRegsWithGstShadow
4169 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
4170 & fRegMask;
4171 if (fRegs)
4172 {
4173 if (fPreferVolatile)
4174 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4175 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4176 else
4177 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4178 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4179 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4180 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4181 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4182 }
4183 else
4184 {
4185 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
4186 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4187 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4188 }
4189 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4190}
4191
4192
4193/**
4194 * Allocates a temporary register for loading an immediate value into.
4195 *
4196 * This will emit code to load the immediate, unless there happens to be an
4197 * unused register with the value already loaded.
4198 *
4199 * The caller will not modify the returned register, it must be considered
4200 * read-only. Free using iemNativeRegFreeTmpImm.
4201 *
4202 * @returns The host register number; throws VBox status code on failure, so no
4203 * need to check the return value.
4204 * @param pReNative The native recompile state.
4205 * @param poff Pointer to the variable with the code buffer position.
4206 * @param uImm The immediate value that the register must hold upon
4207 * return.
4208 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4209 * registers (@c true, default) or the other way around
4210 * (@c false).
4211 *
4212 * @note Reusing immediate values has not been implemented yet.
4213 */
4214DECL_HIDDEN_THROW(uint8_t)
4215iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
4216{
4217 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
4218 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
4219 return idxReg;
4220}
4221
4222
4223/**
4224 * Allocates a temporary host general purpose register for keeping a guest
4225 * register value.
4226 *
4227 * Since we may already have a register holding the guest register value,
4228 * code will be emitted to do the loading if that's not the case. Code may also
4229 * be emitted if we have to free up a register to satify the request.
4230 *
4231 * @returns The host register number; throws VBox status code on failure, so no
4232 * need to check the return value.
4233 * @param pReNative The native recompile state.
4234 * @param poff Pointer to the variable with the code buffer
4235 * position. This will be update if we need to move a
4236 * variable from register to stack in order to satisfy
4237 * the request.
4238 * @param enmGstReg The guest register that will is to be updated.
4239 * @param enmIntendedUse How the caller will be using the host register.
4240 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
4241 * register is okay (default). The ASSUMPTION here is
4242 * that the caller has already flushed all volatile
4243 * registers, so this is only applied if we allocate a
4244 * new register.
4245 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
4246 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
4247 */
4248DECL_HIDDEN_THROW(uint8_t)
4249iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
4250 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
4251 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
4252{
4253 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4254#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4255 AssertMsg( fSkipLivenessAssert
4256 || pReNative->idxCurCall == 0
4257 || enmGstReg == kIemNativeGstReg_Pc
4258 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4259 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4260 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
4261 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4262 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
4263 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4264#endif
4265 RT_NOREF(fSkipLivenessAssert);
4266#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
4267 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
4268#endif
4269 uint32_t const fRegMask = !fNoVolatileRegs
4270 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
4271 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4272
4273 /*
4274 * First check if the guest register value is already in a host register.
4275 */
4276 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4277 {
4278 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4279 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4280 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4281 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4282
4283 /* It's not supposed to be allocated... */
4284 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4285 {
4286 /*
4287 * If the register will trash the guest shadow copy, try find a
4288 * completely unused register we can use instead. If that fails,
4289 * we need to disassociate the host reg from the guest reg.
4290 */
4291 /** @todo would be nice to know if preserving the register is in any way helpful. */
4292 /* If the purpose is calculations, try duplicate the register value as
4293 we'll be clobbering the shadow. */
4294 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
4295 && ( ~pReNative->Core.bmHstRegs
4296 & ~pReNative->Core.bmHstRegsWithGstShadow
4297 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
4298 {
4299 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
4300
4301 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4302
4303 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4304 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4305 g_apszIemNativeHstRegNames[idxRegNew]));
4306 idxReg = idxRegNew;
4307 }
4308 /* If the current register matches the restrictions, go ahead and allocate
4309 it for the caller. */
4310 else if (fRegMask & RT_BIT_32(idxReg))
4311 {
4312 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4313 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4314 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4315 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4316 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
4317 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4318 else
4319 {
4320 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
4321 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
4322 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4323 }
4324 }
4325 /* Otherwise, allocate a register that satisfies the caller and transfer
4326 the shadowing if compatible with the intended use. (This basically
4327 means the call wants a non-volatile register (RSP push/pop scenario).) */
4328 else
4329 {
4330 Assert(fNoVolatileRegs);
4331 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
4332 !fNoVolatileRegs
4333 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
4334 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4335 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4336 {
4337 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4338 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
4339 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
4340 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4341 }
4342 else
4343 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4344 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4345 g_apszIemNativeHstRegNames[idxRegNew]));
4346 idxReg = idxRegNew;
4347 }
4348 }
4349 else
4350 {
4351 /*
4352 * Oops. Shadowed guest register already allocated!
4353 *
4354 * Allocate a new register, copy the value and, if updating, the
4355 * guest shadow copy assignment to the new register.
4356 */
4357 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4358 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
4359 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
4360 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
4361
4362 /** @todo share register for readonly access. */
4363 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
4364 enmIntendedUse == kIemNativeGstRegUse_Calculation);
4365
4366 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4367 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4368
4369 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4370 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4371 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
4372 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4373 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4374 else
4375 {
4376 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4377 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
4378 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4379 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4380 }
4381 idxReg = idxRegNew;
4382 }
4383 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
4384
4385#ifdef VBOX_STRICT
4386 /* Strict builds: Check that the value is correct. */
4387 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4388#endif
4389
4390 return idxReg;
4391 }
4392
4393 /*
4394 * Allocate a new register, load it with the guest value and designate it as a copy of the
4395 */
4396 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
4397
4398 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4399 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
4400
4401 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4402 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
4403 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
4404 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4405
4406 return idxRegNew;
4407}
4408
4409
4410/**
4411 * Allocates a temporary host general purpose register that already holds the
4412 * given guest register value.
4413 *
4414 * The use case for this function is places where the shadowing state cannot be
4415 * modified due to branching and such. This will fail if the we don't have a
4416 * current shadow copy handy or if it's incompatible. The only code that will
4417 * be emitted here is value checking code in strict builds.
4418 *
4419 * The intended use can only be readonly!
4420 *
4421 * @returns The host register number, UINT8_MAX if not present.
4422 * @param pReNative The native recompile state.
4423 * @param poff Pointer to the instruction buffer offset.
4424 * Will be updated in strict builds if a register is
4425 * found.
4426 * @param enmGstReg The guest register that will is to be updated.
4427 * @note In strict builds, this may throw instruction buffer growth failures.
4428 * Non-strict builds will not throw anything.
4429 * @sa iemNativeRegAllocTmpForGuestReg
4430 */
4431DECL_HIDDEN_THROW(uint8_t)
4432iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4433{
4434 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4435#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4436 AssertMsg( pReNative->idxCurCall == 0
4437 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4438 || enmGstReg == kIemNativeGstReg_Pc,
4439 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4440#endif
4441
4442 /*
4443 * First check if the guest register value is already in a host register.
4444 */
4445 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4446 {
4447 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4448 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4449 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4450 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4451
4452 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4453 {
4454 /*
4455 * We only do readonly use here, so easy compared to the other
4456 * variant of this code.
4457 */
4458 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4459 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4460 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4461 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
4462 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4463
4464#ifdef VBOX_STRICT
4465 /* Strict builds: Check that the value is correct. */
4466 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4467#else
4468 RT_NOREF(poff);
4469#endif
4470 return idxReg;
4471 }
4472 }
4473
4474 return UINT8_MAX;
4475}
4476
4477
4478/**
4479 * Allocates argument registers for a function call.
4480 *
4481 * @returns New code buffer offset on success; throws VBox status code on failure, so no
4482 * need to check the return value.
4483 * @param pReNative The native recompile state.
4484 * @param off The current code buffer offset.
4485 * @param cArgs The number of arguments the function call takes.
4486 */
4487DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
4488{
4489 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
4490 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
4491 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4492 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4493
4494 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4495 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4496 else if (cArgs == 0)
4497 return true;
4498
4499 /*
4500 * Do we get luck and all register are free and not shadowing anything?
4501 */
4502 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
4503 for (uint32_t i = 0; i < cArgs; i++)
4504 {
4505 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4506 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4507 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4508 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4509 }
4510 /*
4511 * Okay, not lucky so we have to free up the registers.
4512 */
4513 else
4514 for (uint32_t i = 0; i < cArgs; i++)
4515 {
4516 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4517 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
4518 {
4519 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4520 {
4521 case kIemNativeWhat_Var:
4522 {
4523 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4524 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4525 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
4526 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4527 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
4528#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4529 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4530#endif
4531
4532 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
4533 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4534 else
4535 {
4536 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4537 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4538 }
4539 break;
4540 }
4541
4542 case kIemNativeWhat_Tmp:
4543 case kIemNativeWhat_Arg:
4544 case kIemNativeWhat_rc:
4545 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4546 default:
4547 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
4548 }
4549
4550 }
4551 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4552 {
4553 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4554 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4555 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4556 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4557 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4558 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4559 }
4560 else
4561 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4562 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4563 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4564 }
4565 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
4566 return true;
4567}
4568
4569
4570DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
4571
4572
4573#if 0
4574/**
4575 * Frees a register assignment of any type.
4576 *
4577 * @param pReNative The native recompile state.
4578 * @param idxHstReg The register to free.
4579 *
4580 * @note Does not update variables.
4581 */
4582DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4583{
4584 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4585 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4586 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
4587 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
4588 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
4589 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
4590 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
4591 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
4592 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
4593 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
4594 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4595 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4596 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4597 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4598
4599 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4600 /* no flushing, right:
4601 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4602 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4603 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4604 */
4605}
4606#endif
4607
4608
4609/**
4610 * Frees a temporary register.
4611 *
4612 * Any shadow copies of guest registers assigned to the host register will not
4613 * be flushed by this operation.
4614 */
4615DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4616{
4617 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4618 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
4619 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4620 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
4621 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4622}
4623
4624
4625/**
4626 * Frees a temporary immediate register.
4627 *
4628 * It is assumed that the call has not modified the register, so it still hold
4629 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
4630 */
4631DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4632{
4633 iemNativeRegFreeTmp(pReNative, idxHstReg);
4634}
4635
4636
4637/**
4638 * Frees a register assigned to a variable.
4639 *
4640 * The register will be disassociated from the variable.
4641 */
4642DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4643{
4644 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4645 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4646 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4647 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4648 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
4649#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4650 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4651#endif
4652
4653 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4654 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4655 if (!fFlushShadows)
4656 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4657 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4658 else
4659 {
4660 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4661 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4662 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4663 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4664 uint64_t fGstRegShadows = fGstRegShadowsOld;
4665 while (fGstRegShadows)
4666 {
4667 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4668 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4669
4670 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4671 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4672 }
4673 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4674 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4675 }
4676}
4677
4678
4679#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4680# ifdef LOG_ENABLED
4681/** Host CPU SIMD register names. */
4682DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
4683{
4684# ifdef RT_ARCH_AMD64
4685 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
4686# elif RT_ARCH_ARM64
4687 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
4688 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
4689# else
4690# error "port me"
4691# endif
4692};
4693# endif
4694
4695
4696/**
4697 * Frees a SIMD register assigned to a variable.
4698 *
4699 * The register will be disassociated from the variable.
4700 */
4701DECLHIDDEN(void) iemNativeSimdRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4702{
4703 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstReg));
4704 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4705 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
4706 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4707 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
4708 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4709
4710 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4711 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
4712 if (!fFlushShadows)
4713 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4714 g_apszIemNativeHstSimdRegNames[idxHstReg], pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows, idxVar));
4715 else
4716 {
4717 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4718 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows;
4719 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
4720 pReNative->Core.bmGstSimdRegShadows &= ~fGstRegShadowsOld;
4721 uint64_t fGstRegShadows = fGstRegShadowsOld;
4722 while (fGstRegShadows)
4723 {
4724 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4725 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4726
4727 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxHstReg);
4728 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = UINT8_MAX;
4729 }
4730 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4731 g_apszIemNativeHstSimdRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4732 }
4733}
4734#endif
4735
4736
4737/**
4738 * Called right before emitting a call instruction to move anything important
4739 * out of call-volatile registers, free and flush the call-volatile registers,
4740 * optionally freeing argument variables.
4741 *
4742 * @returns New code buffer offset, UINT32_MAX on failure.
4743 * @param pReNative The native recompile state.
4744 * @param off The code buffer offset.
4745 * @param cArgs The number of arguments the function call takes.
4746 * It is presumed that the host register part of these have
4747 * been allocated as such already and won't need moving,
4748 * just freeing.
4749 * @param fKeepVars Mask of variables that should keep their register
4750 * assignments. Caller must take care to handle these.
4751 */
4752DECL_HIDDEN_THROW(uint32_t)
4753iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4754{
4755 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4756
4757 /* fKeepVars will reduce this mask. */
4758 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4759
4760 /*
4761 * Move anything important out of volatile registers.
4762 */
4763 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4764 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4765 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4766#ifdef IEMNATIVE_REG_FIXED_TMP0
4767 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4768#endif
4769#ifdef IEMNATIVE_REG_FIXED_TMP1
4770 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
4771#endif
4772#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4773 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4774#endif
4775 & ~g_afIemNativeCallRegs[cArgs];
4776
4777 fRegsToMove &= pReNative->Core.bmHstRegs;
4778 if (!fRegsToMove)
4779 { /* likely */ }
4780 else
4781 {
4782 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4783 while (fRegsToMove != 0)
4784 {
4785 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4786 fRegsToMove &= ~RT_BIT_32(idxReg);
4787
4788 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4789 {
4790 case kIemNativeWhat_Var:
4791 {
4792 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4793 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4794 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4795 Assert(pVar->idxReg == idxReg);
4796 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4797 {
4798 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4799 idxVar, pVar->enmKind, pVar->idxReg));
4800 if (pVar->enmKind != kIemNativeVarKind_Stack)
4801 pVar->idxReg = UINT8_MAX;
4802 else
4803 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4804 }
4805 else
4806 fRegsToFree &= ~RT_BIT_32(idxReg);
4807 continue;
4808 }
4809
4810 case kIemNativeWhat_Arg:
4811 AssertMsgFailed(("What?!?: %u\n", idxReg));
4812 continue;
4813
4814 case kIemNativeWhat_rc:
4815 case kIemNativeWhat_Tmp:
4816 AssertMsgFailed(("Missing free: %u\n", idxReg));
4817 continue;
4818
4819 case kIemNativeWhat_FixedTmp:
4820 case kIemNativeWhat_pVCpuFixed:
4821 case kIemNativeWhat_pCtxFixed:
4822 case kIemNativeWhat_PcShadow:
4823 case kIemNativeWhat_FixedReserved:
4824 case kIemNativeWhat_Invalid:
4825 case kIemNativeWhat_End:
4826 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4827 }
4828 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4829 }
4830 }
4831
4832 /*
4833 * Do the actual freeing.
4834 */
4835 if (pReNative->Core.bmHstRegs & fRegsToFree)
4836 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4837 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4838 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4839
4840 /* If there are guest register shadows in any call-volatile register, we
4841 have to clear the corrsponding guest register masks for each register. */
4842 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4843 if (fHstRegsWithGstShadow)
4844 {
4845 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4846 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
4847 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4848 do
4849 {
4850 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4851 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4852
4853 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4854 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4855 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4856 } while (fHstRegsWithGstShadow != 0);
4857 }
4858
4859 return off;
4860}
4861
4862
4863/**
4864 * Flushes a set of guest register shadow copies.
4865 *
4866 * This is usually done after calling a threaded function or a C-implementation
4867 * of an instruction.
4868 *
4869 * @param pReNative The native recompile state.
4870 * @param fGstRegs Set of guest registers to flush.
4871 */
4872DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4873{
4874 /*
4875 * Reduce the mask by what's currently shadowed
4876 */
4877 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4878 fGstRegs &= bmGstRegShadowsOld;
4879 if (fGstRegs)
4880 {
4881 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4882 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4883 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4884 if (bmGstRegShadowsNew)
4885 {
4886 /*
4887 * Partial.
4888 */
4889 do
4890 {
4891 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4892 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4893 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4894 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4895 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4896
4897 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4898 fGstRegs &= ~fInThisHstReg;
4899 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4900 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4901 if (!fGstRegShadowsNew)
4902 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4903 } while (fGstRegs != 0);
4904 }
4905 else
4906 {
4907 /*
4908 * Clear all.
4909 */
4910 do
4911 {
4912 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4913 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4914 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4915 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4916 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4917
4918 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4919 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4920 } while (fGstRegs != 0);
4921 pReNative->Core.bmHstRegsWithGstShadow = 0;
4922 }
4923 }
4924}
4925
4926
4927/**
4928 * Flushes guest register shadow copies held by a set of host registers.
4929 *
4930 * This is used with the TLB lookup code for ensuring that we don't carry on
4931 * with any guest shadows in volatile registers, as these will get corrupted by
4932 * a TLB miss.
4933 *
4934 * @param pReNative The native recompile state.
4935 * @param fHstRegs Set of host registers to flush guest shadows for.
4936 */
4937DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
4938{
4939 /*
4940 * Reduce the mask by what's currently shadowed.
4941 */
4942 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
4943 fHstRegs &= bmHstRegsWithGstShadowOld;
4944 if (fHstRegs)
4945 {
4946 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
4947 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
4948 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
4949 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
4950 if (bmHstRegsWithGstShadowNew)
4951 {
4952 /*
4953 * Partial (likely).
4954 */
4955 uint64_t fGstShadows = 0;
4956 do
4957 {
4958 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4959 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4960 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4961 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4962
4963 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4964 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4965 fHstRegs &= ~RT_BIT_32(idxHstReg);
4966 } while (fHstRegs != 0);
4967 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
4968 }
4969 else
4970 {
4971 /*
4972 * Clear all.
4973 */
4974 do
4975 {
4976 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4977 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4978 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4979 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4980
4981 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4982 fHstRegs &= ~RT_BIT_32(idxHstReg);
4983 } while (fHstRegs != 0);
4984 pReNative->Core.bmGstRegShadows = 0;
4985 }
4986 }
4987}
4988
4989
4990/**
4991 * Restores guest shadow copies in volatile registers.
4992 *
4993 * This is used after calling a helper function (think TLB miss) to restore the
4994 * register state of volatile registers.
4995 *
4996 * @param pReNative The native recompile state.
4997 * @param off The code buffer offset.
4998 * @param fHstRegsActiveShadows Set of host registers which are allowed to
4999 * be active (allocated) w/o asserting. Hack.
5000 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
5001 * iemNativeVarRestoreVolatileRegsPostHlpCall()
5002 */
5003DECL_HIDDEN_THROW(uint32_t)
5004iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
5005{
5006 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
5007 if (fHstRegs)
5008 {
5009 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
5010 do
5011 {
5012 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5013
5014 /* It's not fatal if a register is active holding a variable that
5015 shadowing a guest register, ASSUMING all pending guest register
5016 writes were flushed prior to the helper call. However, we'll be
5017 emitting duplicate restores, so it wasts code space. */
5018 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
5019 RT_NOREF(fHstRegsActiveShadows);
5020
5021 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5022 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
5023 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
5024 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
5025
5026 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
5027 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
5028
5029 fHstRegs &= ~RT_BIT_32(idxHstReg);
5030 } while (fHstRegs != 0);
5031 }
5032 return off;
5033}
5034
5035
5036
5037
5038/*********************************************************************************************************************************
5039* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
5040*********************************************************************************************************************************/
5041#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5042
5043/**
5044 * Info about shadowed guest SIMD register values.
5045 * @see IEMNATIVEGSTSIMDREG
5046 */
5047static struct
5048{
5049 /** Offset in VMCPU of XMM (low 128-bit) registers. */
5050 uint32_t offXmm;
5051 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
5052 uint32_t offYmm;
5053 /** Name (for logging). */
5054 const char *pszName;
5055} const g_aGstSimdShadowInfo[] =
5056{
5057#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
5058 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
5059 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
5060 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
5061 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
5062 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
5063 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
5064 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
5065 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
5066 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
5067 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
5068 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
5069 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
5070 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
5071 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
5072 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
5073 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
5074 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
5075#undef CPUMCTX_OFF_AND_SIZE
5076};
5077AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
5078
5079
5080/**
5081 * Frees a temporary SIMD register.
5082 *
5083 * Any shadow copies of guest registers assigned to the host register will not
5084 * be flushed by this operation.
5085 */
5086DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
5087{
5088 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
5089 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
5090 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5091 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
5092 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5093}
5094
5095
5096/**
5097 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
5098 *
5099 * @returns New code bufferoffset.
5100 * @param pReNative The native recompile state.
5101 * @param off Current code buffer position.
5102 * @param enmGstSimdReg The guest SIMD register to flush.
5103 */
5104DECL_HIDDEN_THROW(uint32_t)
5105iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
5106{
5107 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5108
5109 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
5110 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
5111 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg),
5112 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)));
5113
5114 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
5115 {
5116 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5117 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
5118 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5119 }
5120
5121 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))
5122 {
5123 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5124 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
5125 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5126 }
5127
5128 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, enmGstSimdReg);
5129 return off;
5130}
5131
5132
5133/**
5134 * Locate a register, possibly freeing one up.
5135 *
5136 * This ASSUMES the caller has done the minimal/optimal allocation checks and
5137 * failed.
5138 *
5139 * @returns Host register number on success. Returns UINT8_MAX if no registers
5140 * found, the caller is supposed to deal with this and raise a
5141 * allocation type specific status code (if desired).
5142 *
5143 * @throws VBox status code if we're run into trouble spilling a variable of
5144 * recording debug info. Does NOT throw anything if we're out of
5145 * registers, though.
5146 */
5147static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
5148 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
5149{
5150 //STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
5151 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5152 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5153
5154 /*
5155 * Try a freed register that's shadowing a guest register.
5156 */
5157 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
5158 if (fRegs)
5159 {
5160 //STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
5161
5162#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5163 /*
5164 * When we have livness information, we use it to kick out all shadowed
5165 * guest register that will not be needed any more in this TB. If we're
5166 * lucky, this may prevent us from ending up here again.
5167 *
5168 * Note! We must consider the previous entry here so we don't free
5169 * anything that the current threaded function requires (current
5170 * entry is produced by the next threaded function).
5171 */
5172 uint32_t const idxCurCall = pReNative->idxCurCall;
5173 if (idxCurCall > 0)
5174 {
5175 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
5176
5177# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5178 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
5179 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
5180 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
5181#else
5182 /* Construct a mask of the registers not in the read or write state.
5183 Note! We could skips writes, if they aren't from us, as this is just
5184 a hack to prevent trashing registers that have just been written
5185 or will be written when we retire the current instruction. */
5186 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5187 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5188 & IEMLIVENESSBIT_MASK;
5189#endif
5190 /* If it matches any shadowed registers. */
5191 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
5192 {
5193 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
5194 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
5195 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
5196
5197 /* See if we've got any unshadowed registers we can return now. */
5198 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
5199 if (fUnshadowedRegs)
5200 {
5201 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
5202 return (fPreferVolatile
5203 ? ASMBitFirstSetU32(fUnshadowedRegs)
5204 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
5205 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
5206 - 1;
5207 }
5208 }
5209 }
5210#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5211
5212 unsigned const idxReg = (fPreferVolatile
5213 ? ASMBitFirstSetU32(fRegs)
5214 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5215 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
5216 - 1;
5217
5218 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
5219 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
5220 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5221 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
5222
5223 /* We need to flush any pending guest register writes this host SIMD register shadows. */
5224 uint32_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5225 uint32_t idxGstSimdReg = 0;
5226 do
5227 {
5228 if (fGstRegShadows & 0x1)
5229 {
5230 *poff = iemNativeSimdRegFlushPendingWrite(pReNative, *poff, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5231 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg));
5232 }
5233 idxGstSimdReg++;
5234 fGstRegShadows >>= 1;
5235 } while (fGstRegShadows);
5236
5237 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5238 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5239 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5240 pReNative->Core.aHstSimdRegs[idxReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5241 return idxReg;
5242 }
5243
5244 /*
5245 * Try free up a variable that's in a register.
5246 *
5247 * We do two rounds here, first evacuating variables we don't need to be
5248 * saved on the stack, then in the second round move things to the stack.
5249 */
5250 //STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
5251 AssertReleaseFailed(); /** @todo No variable support right now. */
5252#if 0
5253 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
5254 {
5255 uint32_t fVars = pReNative->Core.bmSimdVars;
5256 while (fVars)
5257 {
5258 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
5259 uint8_t const idxReg = pReNative->Core.aSimdVars[idxVar].idxReg;
5260 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
5261 && (RT_BIT_32(idxReg) & fRegMask)
5262 && ( iLoop == 0
5263 ? pReNative->Core.aSimdVars[idxVar].enmKind != kIemNativeVarKind_Stack
5264 : pReNative->Core.aSimdVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5265 && !pReNative->Core.aSimdVars[idxVar].fRegAcquired)
5266 {
5267 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
5268 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
5269 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5270 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5271 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
5272 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
5273
5274 if (pReNative->Core.aSimdVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5275 {
5276 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
5277 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
5278 }
5279
5280 pReNative->Core.aSimdVars[idxVar].idxReg = UINT8_MAX;
5281 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
5282
5283 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5284 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5285 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5286 return idxReg;
5287 }
5288 fVars &= ~RT_BIT_32(idxVar);
5289 }
5290 }
5291#endif
5292
5293 AssertFailed();
5294 return UINT8_MAX;
5295}
5296
5297
5298/**
5299 * Flushes a set of guest register shadow copies.
5300 *
5301 * This is usually done after calling a threaded function or a C-implementation
5302 * of an instruction.
5303 *
5304 * @param pReNative The native recompile state.
5305 * @param fGstSimdRegs Set of guest SIMD registers to flush.
5306 */
5307DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
5308{
5309 /*
5310 * Reduce the mask by what's currently shadowed
5311 */
5312 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
5313 fGstSimdRegs &= bmGstSimdRegShadows;
5314 if (fGstSimdRegs)
5315 {
5316 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
5317 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
5318 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
5319 if (bmGstSimdRegShadowsNew)
5320 {
5321 /*
5322 * Partial.
5323 */
5324 do
5325 {
5326 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5327 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5328 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5329 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5330 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5331 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5332
5333 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
5334 fGstSimdRegs &= ~fInThisHstReg;
5335 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5336 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5337 if (!fGstRegShadowsNew)
5338 {
5339 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5340 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5341 }
5342 } while (fGstSimdRegs != 0);
5343 }
5344 else
5345 {
5346 /*
5347 * Clear all.
5348 */
5349 do
5350 {
5351 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5352 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5353 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5354 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5355 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5356 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5357
5358 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5359 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
5360 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5361 } while (fGstSimdRegs != 0);
5362 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
5363 }
5364 }
5365}
5366
5367
5368/**
5369 * Allocates a temporary host SIMD register.
5370 *
5371 * This may emit code to save register content onto the stack in order to free
5372 * up a register.
5373 *
5374 * @returns The host register number; throws VBox status code on failure,
5375 * so no need to check the return value.
5376 * @param pReNative The native recompile state.
5377 * @param poff Pointer to the variable with the code buffer position.
5378 * This will be update if we need to move a variable from
5379 * register to stack in order to satisfy the request.
5380 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5381 * registers (@c true, default) or the other way around
5382 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5383 */
5384DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
5385{
5386 /*
5387 * Try find a completely unused register, preferably a call-volatile one.
5388 */
5389 uint8_t idxSimdReg;
5390 uint32_t fRegs = ~pReNative->Core.bmHstRegs
5391 & ~pReNative->Core.bmHstRegsWithGstShadow
5392 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
5393 if (fRegs)
5394 {
5395 if (fPreferVolatile)
5396 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5397 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5398 else
5399 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5400 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5401 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5402 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5403 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5404 }
5405 else
5406 {
5407 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
5408 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5409 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5410 }
5411
5412 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5413 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5414}
5415
5416
5417/**
5418 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
5419 * registers.
5420 *
5421 * @returns The host register number; throws VBox status code on failure,
5422 * so no need to check the return value.
5423 * @param pReNative The native recompile state.
5424 * @param poff Pointer to the variable with the code buffer position.
5425 * This will be update if we need to move a variable from
5426 * register to stack in order to satisfy the request.
5427 * @param fRegMask Mask of acceptable registers.
5428 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5429 * registers (@c true, default) or the other way around
5430 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5431 */
5432DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
5433 bool fPreferVolatile /*= true*/)
5434{
5435 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5436 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5437
5438 /*
5439 * Try find a completely unused register, preferably a call-volatile one.
5440 */
5441 uint8_t idxSimdReg;
5442 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
5443 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5444 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
5445 & fRegMask;
5446 if (fRegs)
5447 {
5448 if (fPreferVolatile)
5449 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5450 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5451 else
5452 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5453 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5454 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5455 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5456 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5457 }
5458 else
5459 {
5460 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
5461 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5462 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5463 }
5464
5465 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5466 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5467}
5468
5469
5470/**
5471 * Sets the indiactor for which part of the given SIMD register has valid data loaded.
5472 *
5473 * @param pReNative The native recompile state.
5474 * @param idxHstSimdReg The host SIMD register to update the state for.
5475 * @param enmLoadSz The load size to set.
5476 */
5477DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg,
5478 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5479{
5480 /* Everything valid already? -> nothing to do. */
5481 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5482 return;
5483
5484 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid)
5485 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
5486 else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz)
5487 {
5488 Assert( ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128
5489 && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5490 || ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128
5491 && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128));
5492 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256;
5493 }
5494}
5495
5496
5497static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdRegDst,
5498 uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
5499{
5500 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
5501 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst
5502 || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5503 {
5504# ifdef RT_ARCH_ARM64
5505 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
5506 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
5507# endif
5508
5509 if (idxHstSimdRegDst != idxHstSimdRegSrc)
5510 {
5511 switch (enmLoadSzDst)
5512 {
5513 case kIemNativeGstSimdRegLdStSz_256:
5514 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5515 break;
5516 case kIemNativeGstSimdRegLdStSz_Low128:
5517 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5518 break;
5519 case kIemNativeGstSimdRegLdStSz_High128:
5520 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst + 1, idxHstSimdRegSrc + 1);
5521 break;
5522 default:
5523 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5524 }
5525
5526 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst);
5527 }
5528 }
5529 else
5530 {
5531 /* Complicated stuff where the source is currently missing something, later. */
5532 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5533 }
5534
5535 return off;
5536}
5537
5538
5539/**
5540 * Allocates a temporary host SIMD register for keeping a guest
5541 * SIMD register value.
5542 *
5543 * Since we may already have a register holding the guest register value,
5544 * code will be emitted to do the loading if that's not the case. Code may also
5545 * be emitted if we have to free up a register to satify the request.
5546 *
5547 * @returns The host register number; throws VBox status code on failure, so no
5548 * need to check the return value.
5549 * @param pReNative The native recompile state.
5550 * @param poff Pointer to the variable with the code buffer
5551 * position. This will be update if we need to move a
5552 * variable from register to stack in order to satisfy
5553 * the request.
5554 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
5555 * @param enmIntendedUse How the caller will be using the host register.
5556 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
5557 * register is okay (default). The ASSUMPTION here is
5558 * that the caller has already flushed all volatile
5559 * registers, so this is only applied if we allocate a
5560 * new register.
5561 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
5562 */
5563DECL_HIDDEN_THROW(uint8_t)
5564iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
5565 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
5566 bool fNoVolatileRegs /*= false*/)
5567{
5568 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
5569#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
5570 AssertMsg( pReNative->idxCurCall == 0
5571 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5572 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5573 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
5574 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5575 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
5576 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
5577#endif
5578#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5579 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
5580#endif
5581 uint32_t const fRegMask = !fNoVolatileRegs
5582 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
5583 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
5584
5585 /*
5586 * First check if the guest register value is already in a host register.
5587 */
5588 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
5589 {
5590 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5591 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
5592 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
5593 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
5594
5595 /* It's not supposed to be allocated... */
5596 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
5597 {
5598 /*
5599 * If the register will trash the guest shadow copy, try find a
5600 * completely unused register we can use instead. If that fails,
5601 * we need to disassociate the host reg from the guest reg.
5602 */
5603 /** @todo would be nice to know if preserving the register is in any way helpful. */
5604 /* If the purpose is calculations, try duplicate the register value as
5605 we'll be clobbering the shadow. */
5606 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
5607 && ( ~pReNative->Core.bmHstSimdRegs
5608 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5609 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
5610 {
5611 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
5612
5613 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5614
5615 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5616 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5617 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5618 idxSimdReg = idxRegNew;
5619 }
5620 /* If the current register matches the restrictions, go ahead and allocate
5621 it for the caller. */
5622 else if (fRegMask & RT_BIT_32(idxSimdReg))
5623 {
5624 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5625 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
5626 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5627 {
5628 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5629 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxSimdReg, idxSimdReg, enmLoadSz);
5630 else
5631 iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz);
5632 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
5633 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5634 }
5635 else
5636 {
5637 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
5638 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
5639 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
5640 }
5641 }
5642 /* Otherwise, allocate a register that satisfies the caller and transfer
5643 the shadowing if compatible with the intended use. (This basically
5644 means the call wants a non-volatile register (RSP push/pop scenario).) */
5645 else
5646 {
5647 Assert(fNoVolatileRegs);
5648 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
5649 !fNoVolatileRegs
5650 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
5651 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5652 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5653 {
5654 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5655 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
5656 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
5657 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5658 }
5659 else
5660 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5661 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5662 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5663 idxSimdReg = idxRegNew;
5664 }
5665 }
5666 else
5667 {
5668 /*
5669 * Oops. Shadowed guest register already allocated!
5670 *
5671 * Allocate a new register, copy the value and, if updating, the
5672 * guest shadow copy assignment to the new register.
5673 */
5674 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5675 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
5676 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
5677 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
5678
5679 /** @todo share register for readonly access. */
5680 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
5681 enmIntendedUse == kIemNativeGstRegUse_Calculation);
5682
5683 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5684 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5685 else
5686 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5687
5688 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5689 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5690 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
5691 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5692 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5693 else
5694 {
5695 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5696 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
5697 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5698 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5699 }
5700 idxSimdReg = idxRegNew;
5701 }
5702 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
5703
5704#ifdef VBOX_STRICT
5705 /* Strict builds: Check that the value is correct. */
5706 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5707 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
5708#endif
5709
5710 return idxSimdReg;
5711 }
5712
5713 /*
5714 * Allocate a new register, load it with the guest value and designate it as a copy of the
5715 */
5716 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
5717
5718 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5719 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
5720 else
5721 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5722
5723 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5724 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
5725
5726 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
5727 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5728
5729 return idxRegNew;
5730}
5731
5732#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5733
5734
5735
5736/*********************************************************************************************************************************
5737* Code emitters for flushing pending guest register writes and sanity checks *
5738*********************************************************************************************************************************/
5739
5740#ifdef VBOX_STRICT
5741/**
5742 * Does internal register allocator sanity checks.
5743 */
5744DECLHIDDEN(void) iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
5745{
5746 /*
5747 * Iterate host registers building a guest shadowing set.
5748 */
5749 uint64_t bmGstRegShadows = 0;
5750 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
5751 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
5752 while (bmHstRegsWithGstShadow)
5753 {
5754 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
5755 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5756 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5757
5758 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5759 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
5760 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
5761 bmGstRegShadows |= fThisGstRegShadows;
5762 while (fThisGstRegShadows)
5763 {
5764 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
5765 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
5766 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
5767 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
5768 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
5769 }
5770 }
5771 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
5772 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
5773 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
5774
5775 /*
5776 * Now the other way around, checking the guest to host index array.
5777 */
5778 bmHstRegsWithGstShadow = 0;
5779 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
5780 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5781 while (bmGstRegShadows)
5782 {
5783 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
5784 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5785 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
5786
5787 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5788 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
5789 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
5790 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
5791 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5792 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
5793 }
5794 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
5795 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
5796 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
5797}
5798#endif /* VBOX_STRICT */
5799
5800
5801/**
5802 * Flushes any delayed guest register writes.
5803 *
5804 * This must be called prior to calling CImpl functions and any helpers that use
5805 * the guest state (like raising exceptions) and such.
5806 *
5807 * This optimization has not yet been implemented. The first target would be
5808 * RIP updates, since these are the most common ones.
5809 */
5810DECL_HIDDEN_THROW(uint32_t)
5811iemNativeRegFlushPendingWritesSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept, bool fFlushShadows)
5812{
5813#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5814 if (!(fGstShwExcept & kIemNativeGstReg_Pc))
5815 off = iemNativeEmitPcWriteback(pReNative, off);
5816#else
5817 RT_NOREF(pReNative, fGstShwExcept);
5818#endif
5819
5820#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5821 /** @todo r=bird: There must be a quicker way to check if anything needs
5822 * doing and then call simd function to do the flushing */
5823 /** @todo This doesn't mix well with fGstShwExcept but we ignore this for now and just flush everything. */
5824 for (uint8_t idxGstSimdReg = 0; idxGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo); idxGstSimdReg++)
5825 {
5826 Assert( (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg)
5827 || !IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg)));
5828
5829 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg))
5830 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5831
5832 if ( fFlushShadows
5833 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg))
5834 {
5835 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxGstSimdReg];
5836
5837 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
5838 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg)));
5839 }
5840 }
5841#else
5842 RT_NOREF(pReNative, fGstShwExcept, fFlushShadows);
5843#endif
5844
5845 return off;
5846}
5847
5848
5849#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5850/**
5851 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
5852 */
5853DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcWritebackSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5854{
5855 Assert(pReNative->Core.offPc);
5856# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5857 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5858 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, pReNative->Core.cInstrPcUpdateSkipped);
5859# endif
5860
5861# ifndef IEMNATIVE_REG_FIXED_PC_DBG
5862 /* Allocate a temporary PC register. */
5863 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5864
5865 /* Perform the addition and store the result. */
5866 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5867 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5868
5869 /* Free but don't flush the PC register. */
5870 iemNativeRegFreeTmp(pReNative, idxPcReg);
5871# else
5872 /* Compare the shadow with the context value, they should match. */
5873 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
5874 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
5875# endif
5876
5877 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, pReNative->Core.cInstrPcUpdateSkipped);
5878 pReNative->Core.offPc = 0;
5879 pReNative->Core.cInstrPcUpdateSkipped = 0;
5880
5881 return off;
5882}
5883#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
5884
5885
5886/*********************************************************************************************************************************
5887* Code Emitters (larger snippets) *
5888*********************************************************************************************************************************/
5889
5890/**
5891 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
5892 * extending to 64-bit width.
5893 *
5894 * @returns New code buffer offset on success, UINT32_MAX on failure.
5895 * @param pReNative .
5896 * @param off The current code buffer position.
5897 * @param idxHstReg The host register to load the guest register value into.
5898 * @param enmGstReg The guest register to load.
5899 *
5900 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5901 * that is something the caller needs to do if applicable.
5902 */
5903DECL_HIDDEN_THROW(uint32_t)
5904iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
5905{
5906 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
5907 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
5908
5909 switch (g_aGstShadowInfo[enmGstReg].cb)
5910 {
5911 case sizeof(uint64_t):
5912 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5913 case sizeof(uint32_t):
5914 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5915 case sizeof(uint16_t):
5916 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5917#if 0 /* not present in the table. */
5918 case sizeof(uint8_t):
5919 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5920#endif
5921 default:
5922 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5923 }
5924}
5925
5926
5927#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5928/**
5929 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
5930 *
5931 * @returns New code buffer offset on success, UINT32_MAX on failure.
5932 * @param pReNative The recompiler state.
5933 * @param off The current code buffer position.
5934 * @param idxHstSimdReg The host register to load the guest register value into.
5935 * @param enmGstSimdReg The guest register to load.
5936 * @param enmLoadSz The load size of the register.
5937 *
5938 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5939 * that is something the caller needs to do if applicable.
5940 */
5941DECL_HIDDEN_THROW(uint32_t)
5942iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
5943 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5944{
5945 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
5946
5947 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz);
5948 switch (enmLoadSz)
5949 {
5950 case kIemNativeGstSimdRegLdStSz_256:
5951 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5952 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5953 case kIemNativeGstSimdRegLdStSz_Low128:
5954 return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5955 case kIemNativeGstSimdRegLdStSz_High128:
5956 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5957 default:
5958 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5959 }
5960}
5961#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5962
5963#ifdef VBOX_STRICT
5964
5965/**
5966 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
5967 *
5968 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5969 * Trashes EFLAGS on AMD64.
5970 */
5971DECL_HIDDEN_THROW(uint32_t)
5972iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
5973{
5974# ifdef RT_ARCH_AMD64
5975 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
5976
5977 /* rol reg64, 32 */
5978 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5979 pbCodeBuf[off++] = 0xc1;
5980 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5981 pbCodeBuf[off++] = 32;
5982
5983 /* test reg32, ffffffffh */
5984 if (idxReg >= 8)
5985 pbCodeBuf[off++] = X86_OP_REX_B;
5986 pbCodeBuf[off++] = 0xf7;
5987 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5988 pbCodeBuf[off++] = 0xff;
5989 pbCodeBuf[off++] = 0xff;
5990 pbCodeBuf[off++] = 0xff;
5991 pbCodeBuf[off++] = 0xff;
5992
5993 /* je/jz +1 */
5994 pbCodeBuf[off++] = 0x74;
5995 pbCodeBuf[off++] = 0x01;
5996
5997 /* int3 */
5998 pbCodeBuf[off++] = 0xcc;
5999
6000 /* rol reg64, 32 */
6001 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6002 pbCodeBuf[off++] = 0xc1;
6003 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6004 pbCodeBuf[off++] = 32;
6005
6006# elif defined(RT_ARCH_ARM64)
6007 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6008 /* lsr tmp0, reg64, #32 */
6009 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
6010 /* cbz tmp0, +1 */
6011 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6012 /* brk #0x1100 */
6013 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
6014
6015# else
6016# error "Port me!"
6017# endif
6018 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6019 return off;
6020}
6021
6022
6023/**
6024 * Emitting code that checks that the content of register @a idxReg is the same
6025 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
6026 * instruction if that's not the case.
6027 *
6028 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6029 * Trashes EFLAGS on AMD64.
6030 */
6031DECL_HIDDEN_THROW(uint32_t)
6032iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
6033{
6034# ifdef RT_ARCH_AMD64
6035 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6036
6037 /* cmp reg, [mem] */
6038 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
6039 {
6040 if (idxReg >= 8)
6041 pbCodeBuf[off++] = X86_OP_REX_R;
6042 pbCodeBuf[off++] = 0x38;
6043 }
6044 else
6045 {
6046 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
6047 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
6048 else
6049 {
6050 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
6051 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6052 else
6053 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
6054 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
6055 if (idxReg >= 8)
6056 pbCodeBuf[off++] = X86_OP_REX_R;
6057 }
6058 pbCodeBuf[off++] = 0x39;
6059 }
6060 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
6061
6062 /* je/jz +1 */
6063 pbCodeBuf[off++] = 0x74;
6064 pbCodeBuf[off++] = 0x01;
6065
6066 /* int3 */
6067 pbCodeBuf[off++] = 0xcc;
6068
6069 /* For values smaller than the register size, we must check that the rest
6070 of the register is all zeros. */
6071 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
6072 {
6073 /* test reg64, imm32 */
6074 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6075 pbCodeBuf[off++] = 0xf7;
6076 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6077 pbCodeBuf[off++] = 0;
6078 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
6079 pbCodeBuf[off++] = 0xff;
6080 pbCodeBuf[off++] = 0xff;
6081
6082 /* je/jz +1 */
6083 pbCodeBuf[off++] = 0x74;
6084 pbCodeBuf[off++] = 0x01;
6085
6086 /* int3 */
6087 pbCodeBuf[off++] = 0xcc;
6088 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6089 }
6090 else
6091 {
6092 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6093 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
6094 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
6095 }
6096
6097# elif defined(RT_ARCH_ARM64)
6098 /* mov TMP0, [gstreg] */
6099 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
6100
6101 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6102 /* sub tmp0, tmp0, idxReg */
6103 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
6104 /* cbz tmp0, +1 */
6105 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6106 /* brk #0x1000+enmGstReg */
6107 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
6108 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6109
6110# else
6111# error "Port me!"
6112# endif
6113 return off;
6114}
6115
6116
6117# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6118# ifdef RT_ARCH_AMD64
6119/**
6120 * Helper for AMD64 to emit code which checks the low 128-bits of the given SIMD register against the given vCPU offset.
6121 */
6122DECL_FORCE_INLINE_THROW(uint32_t) iemNativeEmitGuestSimdRegValueCheckVCpuU128(uint8_t * const pbCodeBuf, uint32_t off, uint8_t idxSimdReg, uint32_t offVCpu)
6123{
6124 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
6125 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6126 if (idxSimdReg >= 8)
6127 pbCodeBuf[off++] = X86_OP_REX_R;
6128 pbCodeBuf[off++] = 0x0f;
6129 pbCodeBuf[off++] = 0x38;
6130 pbCodeBuf[off++] = 0x29;
6131 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxSimdReg, offVCpu);
6132
6133 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
6134 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6135 pbCodeBuf[off++] = X86_OP_REX_W
6136 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
6137 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6138 pbCodeBuf[off++] = 0x0f;
6139 pbCodeBuf[off++] = 0x3a;
6140 pbCodeBuf[off++] = 0x16;
6141 pbCodeBuf[off++] = 0xeb;
6142 pbCodeBuf[off++] = 0x00;
6143
6144 /* cmp tmp0, 0xffffffffffffffff. */
6145 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6146 pbCodeBuf[off++] = 0x83;
6147 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6148 pbCodeBuf[off++] = 0xff;
6149
6150 /* je/jz +1 */
6151 pbCodeBuf[off++] = 0x74;
6152 pbCodeBuf[off++] = 0x01;
6153
6154 /* int3 */
6155 pbCodeBuf[off++] = 0xcc;
6156
6157 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
6158 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6159 pbCodeBuf[off++] = X86_OP_REX_W
6160 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
6161 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6162 pbCodeBuf[off++] = 0x0f;
6163 pbCodeBuf[off++] = 0x3a;
6164 pbCodeBuf[off++] = 0x16;
6165 pbCodeBuf[off++] = 0xeb;
6166 pbCodeBuf[off++] = 0x01;
6167
6168 /* cmp tmp0, 0xffffffffffffffff. */
6169 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6170 pbCodeBuf[off++] = 0x83;
6171 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6172 pbCodeBuf[off++] = 0xff;
6173
6174 /* je/jz +1 */
6175 pbCodeBuf[off++] = 0x74;
6176 pbCodeBuf[off++] = 0x01;
6177
6178 /* int3 */
6179 pbCodeBuf[off++] = 0xcc;
6180
6181 return off;
6182}
6183# endif
6184
6185
6186/**
6187 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
6188 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
6189 * instruction if that's not the case.
6190 *
6191 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
6192 * Trashes EFLAGS on AMD64.
6193 */
6194DECL_HIDDEN_THROW(uint32_t)
6195iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
6196 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6197{
6198 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
6199 if ( ( enmLoadSz == kIemNativeGstSimdRegLdStSz_256
6200 && ( IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)
6201 || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6202 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128
6203 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
6204 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_High128
6205 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6206 return off;
6207
6208# ifdef RT_ARCH_AMD64
6209 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6210 {
6211 /* movdqa vectmp0, idxSimdReg */
6212 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6213
6214 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
6215
6216 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6217 g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6218 }
6219
6220 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6221 {
6222 /* Due to the fact that CPUMCTX stores the high 128-bit separately we need to do this all over again for the high part. */
6223 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 50);
6224
6225 /* vextracti128 vectmp0, idxSimdReg, 1 */
6226 pbCodeBuf[off++] = X86_OP_VEX3;
6227 pbCodeBuf[off++] = (idxSimdReg < 8 ? X86_OP_VEX3_BYTE1_R : 0)
6228 | X86_OP_VEX3_BYTE1_X
6229 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? X86_OP_VEX3_BYTE1_B : 0)
6230 | 0x03; /* Opcode map */
6231 pbCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX3_BYTE2_P_066H);
6232 pbCodeBuf[off++] = 0x39;
6233 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxSimdReg & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
6234 pbCodeBuf[off++] = 0x01;
6235
6236 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6237 g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6238 }
6239# elif defined(RT_ARCH_ARM64)
6240 /* mov vectmp0, [gstreg] */
6241 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
6242
6243 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6244 {
6245 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6246 /* eor vectmp0, vectmp0, idxSimdReg */
6247 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6248 /* cnt vectmp0, vectmp0, #0*/
6249 pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0);
6250 /* umov tmp0, vectmp0.D[0] */
6251 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6252 0 /*idxElem*/, kArmv8InstrUmovInsSz_U64);
6253 /* cbz tmp0, +1 */
6254 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6255 /* brk #0x1000+enmGstReg */
6256 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6257 }
6258
6259 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6260 {
6261 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6262 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
6263 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg + 1);
6264 /* cnt vectmp0 + 1, vectmp0 + 1, #0*/
6265 pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1);
6266 /* umov tmp0, (vectmp0 + 1).D[0] */
6267 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
6268 0 /*idxElem*/, kArmv8InstrUmovInsSz_U64);
6269 /* cbz tmp0, +1 */
6270 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6271 /* brk #0x1000+enmGstReg */
6272 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6273 }
6274
6275# else
6276# error "Port me!"
6277# endif
6278
6279 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6280 return off;
6281}
6282# endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6283
6284
6285/**
6286 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
6287 * important bits.
6288 *
6289 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6290 * Trashes EFLAGS on AMD64.
6291 */
6292DECL_HIDDEN_THROW(uint32_t)
6293iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
6294{
6295 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6296 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
6297 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
6298 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
6299
6300#ifdef RT_ARCH_AMD64
6301 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6302
6303 /* je/jz +1 */
6304 pbCodeBuf[off++] = 0x74;
6305 pbCodeBuf[off++] = 0x01;
6306
6307 /* int3 */
6308 pbCodeBuf[off++] = 0xcc;
6309
6310# elif defined(RT_ARCH_ARM64)
6311 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6312
6313 /* b.eq +1 */
6314 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
6315 /* brk #0x2000 */
6316 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
6317
6318# else
6319# error "Port me!"
6320# endif
6321 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6322
6323 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6324 return off;
6325}
6326
6327#endif /* VBOX_STRICT */
6328
6329
6330#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6331/**
6332 * Worker for IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK.
6333 */
6334DECL_HIDDEN_THROW(uint32_t)
6335iemNativeEmitEFlagsSkippingCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflNeeded)
6336{
6337 uint32_t const offVCpu = RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags);
6338
6339 fEflNeeded &= X86_EFL_STATUS_BITS;
6340 if (fEflNeeded)
6341 {
6342# ifdef RT_ARCH_AMD64
6343 /* test dword [pVCpu + offVCpu], imm32 */
6344 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6345 if (fEflNeeded <= 0xff)
6346 {
6347 pCodeBuf[off++] = 0xf6;
6348 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6349 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6350 }
6351 else
6352 {
6353 pCodeBuf[off++] = 0xf7;
6354 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6355 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6356 pCodeBuf[off++] = RT_BYTE2(fEflNeeded);
6357 pCodeBuf[off++] = RT_BYTE3(fEflNeeded);
6358 pCodeBuf[off++] = RT_BYTE4(fEflNeeded);
6359 }
6360 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6361
6362# else
6363 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6364 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, offVCpu);
6365 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp, fEflNeeded);
6366# ifdef RT_ARCH_ARM64
6367 off = iemNativeEmitJzToFixed(pReNative, off, off + 2);
6368 off = iemNativeEmitBrk(pReNative, off, 0x7777);
6369# else
6370# error "Port me!"
6371# endif
6372 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6373# endif
6374 }
6375 return off;
6376}
6377#endif /* IEMNATIVE_STRICT_EFLAGS_SKIPPING */
6378
6379
6380/**
6381 * Emits a code for checking the return code of a call and rcPassUp, returning
6382 * from the code if either are non-zero.
6383 */
6384DECL_HIDDEN_THROW(uint32_t)
6385iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6386{
6387#ifdef RT_ARCH_AMD64
6388 /*
6389 * AMD64: eax = call status code.
6390 */
6391
6392 /* edx = rcPassUp */
6393 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6394# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6395 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
6396# endif
6397
6398 /* edx = eax | rcPassUp */
6399 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6400 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
6401 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
6402 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6403
6404 /* Jump to non-zero status return path. */
6405 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
6406
6407 /* done. */
6408
6409#elif RT_ARCH_ARM64
6410 /*
6411 * ARM64: w0 = call status code.
6412 */
6413# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6414 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
6415# endif
6416 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6417
6418 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6419
6420 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
6421
6422 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6423 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6424 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
6425
6426#else
6427# error "port me"
6428#endif
6429 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6430 RT_NOREF_PV(idxInstr);
6431 return off;
6432}
6433
6434
6435/**
6436 * Emits code to check if the content of @a idxAddrReg is a canonical address,
6437 * raising a \#GP(0) if it isn't.
6438 *
6439 * @returns New code buffer offset, UINT32_MAX on failure.
6440 * @param pReNative The native recompile state.
6441 * @param off The code buffer offset.
6442 * @param idxAddrReg The host register with the address to check.
6443 * @param idxInstr The current instruction.
6444 */
6445DECL_HIDDEN_THROW(uint32_t)
6446iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
6447{
6448 /*
6449 * Make sure we don't have any outstanding guest register writes as we may
6450 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6451 */
6452 off = iemNativeRegFlushPendingWrites(pReNative, off);
6453
6454#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6455 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6456#else
6457 RT_NOREF(idxInstr);
6458#endif
6459
6460#ifdef RT_ARCH_AMD64
6461 /*
6462 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
6463 * return raisexcpt();
6464 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
6465 */
6466 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6467
6468 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
6469 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
6470 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
6471 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
6472 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6473
6474 iemNativeRegFreeTmp(pReNative, iTmpReg);
6475
6476#elif defined(RT_ARCH_ARM64)
6477 /*
6478 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
6479 * return raisexcpt();
6480 * ----
6481 * mov x1, 0x800000000000
6482 * add x1, x0, x1
6483 * cmp xzr, x1, lsr 48
6484 * b.ne .Lraisexcpt
6485 */
6486 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6487
6488 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
6489 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
6490 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
6491 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6492
6493 iemNativeRegFreeTmp(pReNative, iTmpReg);
6494
6495#else
6496# error "Port me"
6497#endif
6498 return off;
6499}
6500
6501
6502/**
6503 * Emits code to check if that the content of @a idxAddrReg is within the limit
6504 * of CS, raising a \#GP(0) if it isn't.
6505 *
6506 * @returns New code buffer offset; throws VBox status code on error.
6507 * @param pReNative The native recompile state.
6508 * @param off The code buffer offset.
6509 * @param idxAddrReg The host register (32-bit) with the address to
6510 * check.
6511 * @param idxInstr The current instruction.
6512 */
6513DECL_HIDDEN_THROW(uint32_t)
6514iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6515 uint8_t idxAddrReg, uint8_t idxInstr)
6516{
6517 /*
6518 * Make sure we don't have any outstanding guest register writes as we may
6519 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6520 */
6521 off = iemNativeRegFlushPendingWrites(pReNative, off);
6522
6523#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6524 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6525#else
6526 RT_NOREF(idxInstr);
6527#endif
6528
6529 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
6530 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
6531 kIemNativeGstRegUse_ReadOnly);
6532
6533 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
6534 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6535
6536 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
6537 return off;
6538}
6539
6540
6541/**
6542 * Emits a call to a CImpl function or something similar.
6543 */
6544DECL_HIDDEN_THROW(uint32_t)
6545iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
6546 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
6547{
6548 /* Writeback everything. */
6549 off = iemNativeRegFlushPendingWrites(pReNative, off);
6550
6551 /*
6552 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
6553 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
6554 */
6555 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
6556 fGstShwFlush
6557 | RT_BIT_64(kIemNativeGstReg_Pc)
6558 | RT_BIT_64(kIemNativeGstReg_EFlags));
6559 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6560
6561 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6562
6563 /*
6564 * Load the parameters.
6565 */
6566#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
6567 /* Special code the hidden VBOXSTRICTRC pointer. */
6568 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6569 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6570 if (cAddParams > 0)
6571 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
6572 if (cAddParams > 1)
6573 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
6574 if (cAddParams > 2)
6575 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
6576 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6577
6578#else
6579 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6580 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6581 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6582 if (cAddParams > 0)
6583 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
6584 if (cAddParams > 1)
6585 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
6586 if (cAddParams > 2)
6587# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
6588 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
6589# else
6590 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
6591# endif
6592#endif
6593
6594 /*
6595 * Make the call.
6596 */
6597 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
6598
6599#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6600 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6601#endif
6602
6603 /*
6604 * Check the status code.
6605 */
6606 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
6607}
6608
6609
6610/**
6611 * Emits a call to a threaded worker function.
6612 */
6613DECL_HIDDEN_THROW(uint32_t)
6614iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6615{
6616 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6617
6618 /* We don't know what the threaded function is doing so we must flush all pending writes. */
6619 off = iemNativeRegFlushPendingWrites(pReNative, off);
6620
6621 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
6622 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6623
6624#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6625 /* The threaded function may throw / long jmp, so set current instruction
6626 number if we're counting. */
6627 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6628#endif
6629
6630 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
6631
6632#ifdef RT_ARCH_AMD64
6633 /* Load the parameters and emit the call. */
6634# ifdef RT_OS_WINDOWS
6635# ifndef VBOXSTRICTRC_STRICT_ENABLED
6636 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6637 if (cParams > 0)
6638 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
6639 if (cParams > 1)
6640 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
6641 if (cParams > 2)
6642 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
6643# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
6644 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
6645 if (cParams > 0)
6646 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
6647 if (cParams > 1)
6648 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
6649 if (cParams > 2)
6650 {
6651 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
6652 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
6653 }
6654 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6655# endif /* VBOXSTRICTRC_STRICT_ENABLED */
6656# else
6657 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6658 if (cParams > 0)
6659 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
6660 if (cParams > 1)
6661 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
6662 if (cParams > 2)
6663 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
6664# endif
6665
6666 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6667
6668# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6669 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6670# endif
6671
6672#elif RT_ARCH_ARM64
6673 /*
6674 * ARM64:
6675 */
6676 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6677 if (cParams > 0)
6678 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
6679 if (cParams > 1)
6680 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
6681 if (cParams > 2)
6682 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
6683
6684 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6685
6686#else
6687# error "port me"
6688#endif
6689
6690 /*
6691 * Check the status code.
6692 */
6693 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
6694
6695 return off;
6696}
6697
6698#ifdef VBOX_WITH_STATISTICS
6699/**
6700 * Emits code to update the thread call statistics.
6701 */
6702DECL_INLINE_THROW(uint32_t)
6703iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6704{
6705 /*
6706 * Update threaded function stats.
6707 */
6708 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
6709 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
6710# if defined(RT_ARCH_ARM64)
6711 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6712 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6713 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
6714 iemNativeRegFreeTmp(pReNative, idxTmp1);
6715 iemNativeRegFreeTmp(pReNative, idxTmp2);
6716# else
6717 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
6718# endif
6719 return off;
6720}
6721#endif /* VBOX_WITH_STATISTICS */
6722
6723
6724/**
6725 * Emits the code at the ReturnWithFlags label (returns
6726 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
6727 */
6728static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6729{
6730 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
6731 if (idxLabel != UINT32_MAX)
6732 {
6733 iemNativeLabelDefine(pReNative, idxLabel, off);
6734
6735 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
6736
6737 /* jump back to the return sequence. */
6738 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6739 }
6740 return off;
6741}
6742
6743
6744/**
6745 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
6746 */
6747static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6748{
6749 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
6750 if (idxLabel != UINT32_MAX)
6751 {
6752 iemNativeLabelDefine(pReNative, idxLabel, off);
6753
6754 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
6755
6756 /* jump back to the return sequence. */
6757 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6758 }
6759 return off;
6760}
6761
6762
6763/**
6764 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
6765 */
6766static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6767{
6768 /*
6769 * Generate the rc + rcPassUp fiddling code if needed.
6770 */
6771 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6772 if (idxLabel != UINT32_MAX)
6773 {
6774 iemNativeLabelDefine(pReNative, idxLabel, off);
6775
6776 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
6777#ifdef RT_ARCH_AMD64
6778# ifdef RT_OS_WINDOWS
6779# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6780 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
6781# endif
6782 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6783 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
6784# else
6785 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6786 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
6787# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6788 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
6789# endif
6790# endif
6791# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6792 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
6793# endif
6794
6795#else
6796 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
6797 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6798 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
6799#endif
6800
6801 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
6802 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6803 }
6804 return off;
6805}
6806
6807
6808/**
6809 * Emits a standard epilog.
6810 */
6811static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
6812{
6813 *pidxReturnLabel = UINT32_MAX;
6814
6815 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
6816 off = iemNativeRegFlushPendingWrites(pReNative, off);
6817
6818 /*
6819 * Successful return, so clear the return register (eax, w0).
6820 */
6821 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
6822
6823 /*
6824 * Define label for common return point.
6825 */
6826 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
6827 *pidxReturnLabel = idxReturn;
6828
6829 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6830
6831 /*
6832 * Restore registers and return.
6833 */
6834#ifdef RT_ARCH_AMD64
6835 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6836
6837 /* Reposition esp at the r15 restore point. */
6838 pbCodeBuf[off++] = X86_OP_REX_W;
6839 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
6840 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
6841 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
6842
6843 /* Pop non-volatile registers and return */
6844 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
6845 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
6846 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
6847 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
6848 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
6849 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
6850 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
6851 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
6852# ifdef RT_OS_WINDOWS
6853 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
6854 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
6855# endif
6856 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
6857 pbCodeBuf[off++] = 0xc9; /* leave */
6858 pbCodeBuf[off++] = 0xc3; /* ret */
6859 pbCodeBuf[off++] = 0xcc; /* int3 poison */
6860
6861#elif RT_ARCH_ARM64
6862 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6863
6864 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
6865 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
6866 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6867 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6868 IEMNATIVE_FRAME_VAR_SIZE / 8);
6869 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
6870 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6871 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6872 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6873 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6874 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6875 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6876 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6877 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6878 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6879 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6880 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6881
6882 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
6883 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
6884 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
6885 IEMNATIVE_FRAME_SAVE_REG_SIZE);
6886
6887 /* retab / ret */
6888# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
6889 if (1)
6890 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
6891 else
6892# endif
6893 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
6894
6895#else
6896# error "port me"
6897#endif
6898 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6899
6900 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
6901}
6902
6903
6904/**
6905 * Emits a standard prolog.
6906 */
6907static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6908{
6909#ifdef RT_ARCH_AMD64
6910 /*
6911 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
6912 * reserving 64 bytes for stack variables plus 4 non-register argument
6913 * slots. Fixed register assignment: xBX = pReNative;
6914 *
6915 * Since we always do the same register spilling, we can use the same
6916 * unwind description for all the code.
6917 */
6918 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6919 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
6920 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
6921 pbCodeBuf[off++] = 0x8b;
6922 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
6923 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
6924 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
6925# ifdef RT_OS_WINDOWS
6926 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
6927 pbCodeBuf[off++] = 0x8b;
6928 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
6929 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
6930 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
6931# else
6932 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
6933 pbCodeBuf[off++] = 0x8b;
6934 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
6935# endif
6936 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
6937 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
6938 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
6939 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
6940 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
6941 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
6942 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
6943 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
6944
6945# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6946 /* Save the frame pointer. */
6947 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
6948# endif
6949
6950 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
6951 X86_GREG_xSP,
6952 IEMNATIVE_FRAME_ALIGN_SIZE
6953 + IEMNATIVE_FRAME_VAR_SIZE
6954 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
6955 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
6956 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
6957 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
6958 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
6959
6960#elif RT_ARCH_ARM64
6961 /*
6962 * We set up a stack frame exactly like on x86, only we have to push the
6963 * return address our selves here. We save all non-volatile registers.
6964 */
6965 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
6966
6967# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
6968 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
6969 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
6970 * in any way conditional, so just emitting this instructions now and hoping for the best... */
6971 /* pacibsp */
6972 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
6973# endif
6974
6975 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
6976 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
6977 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6978 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6979 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
6980 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
6981 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6982 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6983 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6984 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6985 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6986 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6987 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6988 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6989 /* Save the BP and LR (ret address) registers at the top of the frame. */
6990 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6991 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6992 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6993 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
6994 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
6995 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
6996
6997 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
6998 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
6999
7000 /* mov r28, r0 */
7001 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
7002 /* mov r27, r1 */
7003 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
7004
7005# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
7006 /* Save the frame pointer. */
7007 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
7008 ARMV8_A64_REG_X2);
7009# endif
7010
7011#else
7012# error "port me"
7013#endif
7014 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7015 return off;
7016}
7017
7018
7019/*********************************************************************************************************************************
7020* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
7021*********************************************************************************************************************************/
7022
7023/**
7024 * Internal work that allocates a variable with kind set to
7025 * kIemNativeVarKind_Invalid and no current stack allocation.
7026 *
7027 * The kind will either be set by the caller or later when the variable is first
7028 * assigned a value.
7029 *
7030 * @returns Unpacked index.
7031 * @internal
7032 */
7033static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7034{
7035 Assert(cbType > 0 && cbType <= 64);
7036 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
7037 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
7038 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
7039 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7040 pReNative->Core.aVars[idxVar].cbVar = cbType;
7041 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7042 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7043 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
7044 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
7045 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
7046 pReNative->Core.aVars[idxVar].fRegAcquired = false;
7047 pReNative->Core.aVars[idxVar].u.uValue = 0;
7048#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7049 pReNative->Core.aVars[idxVar].fSimdReg = false;
7050#endif
7051 return idxVar;
7052}
7053
7054
7055/**
7056 * Internal work that allocates an argument variable w/o setting enmKind.
7057 *
7058 * @returns Unpacked index.
7059 * @internal
7060 */
7061static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7062{
7063 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
7064 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7065 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
7066
7067 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7068 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
7069 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
7070 return idxVar;
7071}
7072
7073
7074/**
7075 * Gets the stack slot for a stack variable, allocating one if necessary.
7076 *
7077 * Calling this function implies that the stack slot will contain a valid
7078 * variable value. The caller deals with any register currently assigned to the
7079 * variable, typically by spilling it into the stack slot.
7080 *
7081 * @returns The stack slot number.
7082 * @param pReNative The recompiler state.
7083 * @param idxVar The variable.
7084 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
7085 */
7086DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7087{
7088 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7089 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7090 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
7091
7092 /* Already got a slot? */
7093 uint8_t const idxStackSlot = pVar->idxStackSlot;
7094 if (idxStackSlot != UINT8_MAX)
7095 {
7096 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
7097 return idxStackSlot;
7098 }
7099
7100 /*
7101 * A single slot is easy to allocate.
7102 * Allocate them from the top end, closest to BP, to reduce the displacement.
7103 */
7104 if (pVar->cbVar <= sizeof(uint64_t))
7105 {
7106 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7107 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7108 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
7109 pVar->idxStackSlot = (uint8_t)iSlot;
7110 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
7111 return (uint8_t)iSlot;
7112 }
7113
7114 /*
7115 * We need more than one stack slot.
7116 *
7117 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
7118 */
7119 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
7120 Assert(pVar->cbVar <= 64);
7121 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
7122 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
7123 uint32_t bmStack = pReNative->Core.bmStack;
7124 while (bmStack != UINT32_MAX)
7125 {
7126 unsigned iSlot = ASMBitLastSetU32(~bmStack);
7127 AssertStmt(iSlot, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7128 iSlot = (iSlot - 1) & ~fBitAlignMask;
7129 if ((bmStack & ~(fBitAllocMask << iSlot)) == bmStack)
7130 {
7131 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
7132 pVar->idxStackSlot = (uint8_t)iSlot;
7133 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7134 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
7135 return (uint8_t)iSlot;
7136 }
7137
7138 bmStack |= (fBitAllocMask << iSlot);
7139 }
7140 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7141}
7142
7143
7144/**
7145 * Changes the variable to a stack variable.
7146 *
7147 * Currently this is s only possible to do the first time the variable is used,
7148 * switching later is can be implemented but not done.
7149 *
7150 * @param pReNative The recompiler state.
7151 * @param idxVar The variable.
7152 * @throws VERR_IEM_VAR_IPE_2
7153 */
7154DECL_HIDDEN_THROW(void) iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7155{
7156 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7157 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7158 if (pVar->enmKind != kIemNativeVarKind_Stack)
7159 {
7160 /* We could in theory transition from immediate to stack as well, but it
7161 would involve the caller doing work storing the value on the stack. So,
7162 till that's required we only allow transition from invalid. */
7163 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7164 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7165 pVar->enmKind = kIemNativeVarKind_Stack;
7166
7167 /* Note! We don't allocate a stack slot here, that's only done when a
7168 slot is actually needed to hold a variable value. */
7169 }
7170}
7171
7172
7173/**
7174 * Sets it to a variable with a constant value.
7175 *
7176 * This does not require stack storage as we know the value and can always
7177 * reload it, unless of course it's referenced.
7178 *
7179 * @param pReNative The recompiler state.
7180 * @param idxVar The variable.
7181 * @param uValue The immediate value.
7182 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7183 */
7184DECL_HIDDEN_THROW(void) iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
7185{
7186 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7187 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7188 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7189 {
7190 /* Only simple transitions for now. */
7191 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7192 pVar->enmKind = kIemNativeVarKind_Immediate;
7193 }
7194 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7195
7196 pVar->u.uValue = uValue;
7197 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
7198 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
7199 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
7200}
7201
7202
7203/**
7204 * Sets the variable to a reference (pointer) to @a idxOtherVar.
7205 *
7206 * This does not require stack storage as we know the value and can always
7207 * reload it. Loading is postponed till needed.
7208 *
7209 * @param pReNative The recompiler state.
7210 * @param idxVar The variable. Unpacked.
7211 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
7212 *
7213 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7214 * @internal
7215 */
7216static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
7217{
7218 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
7219 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
7220
7221 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
7222 {
7223 /* Only simple transitions for now. */
7224 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7225 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7226 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
7227 }
7228 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7229
7230 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
7231
7232 /* Update the other variable, ensure it's a stack variable. */
7233 /** @todo handle variables with const values... that'll go boom now. */
7234 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
7235 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
7236}
7237
7238
7239/**
7240 * Sets the variable to a reference (pointer) to a guest register reference.
7241 *
7242 * This does not require stack storage as we know the value and can always
7243 * reload it. Loading is postponed till needed.
7244 *
7245 * @param pReNative The recompiler state.
7246 * @param idxVar The variable.
7247 * @param enmRegClass The class guest registers to reference.
7248 * @param idxReg The register within @a enmRegClass to reference.
7249 *
7250 * @throws VERR_IEM_VAR_IPE_2
7251 */
7252DECL_HIDDEN_THROW(void) iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
7253 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
7254{
7255 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7256 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7257
7258 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
7259 {
7260 /* Only simple transitions for now. */
7261 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7262 pVar->enmKind = kIemNativeVarKind_GstRegRef;
7263 }
7264 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7265
7266 pVar->u.GstRegRef.enmClass = enmRegClass;
7267 pVar->u.GstRegRef.idx = idxReg;
7268}
7269
7270
7271DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7272{
7273 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7274}
7275
7276
7277DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
7278{
7279 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7280
7281 /* Since we're using a generic uint64_t value type, we must truncate it if
7282 the variable is smaller otherwise we may end up with too large value when
7283 scaling up a imm8 w/ sign-extension.
7284
7285 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
7286 in the bios, bx=1) when running on arm, because clang expect 16-bit
7287 register parameters to have bits 16 and up set to zero. Instead of
7288 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
7289 CF value in the result. */
7290 switch (cbType)
7291 {
7292 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7293 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7294 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7295 }
7296 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7297 return idxVar;
7298}
7299
7300
7301DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
7302{
7303 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
7304 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
7305 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
7306 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
7307 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
7308 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7309
7310 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
7311 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
7312 return idxArgVar;
7313}
7314
7315
7316DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7317{
7318 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7319 /* Don't set to stack now, leave that to the first use as for instance
7320 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
7321 return idxVar;
7322}
7323
7324
7325DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
7326{
7327 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7328
7329 /* Since we're using a generic uint64_t value type, we must truncate it if
7330 the variable is smaller otherwise we may end up with too large value when
7331 scaling up a imm8 w/ sign-extension. */
7332 switch (cbType)
7333 {
7334 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7335 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7336 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7337 }
7338 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7339 return idxVar;
7340}
7341
7342
7343/**
7344 * Makes sure variable @a idxVar has a register assigned to it and that it stays
7345 * fixed till we call iemNativeVarRegisterRelease.
7346 *
7347 * @returns The host register number.
7348 * @param pReNative The recompiler state.
7349 * @param idxVar The variable.
7350 * @param poff Pointer to the instruction buffer offset.
7351 * In case a register needs to be freed up or the value
7352 * loaded off the stack.
7353 * @param fInitialized Set if the variable must already have been initialized.
7354 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7355 * the case.
7356 * @param idxRegPref Preferred register number or UINT8_MAX.
7357 */
7358DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7359 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7360{
7361 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7362 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7363 Assert(pVar->cbVar <= 8);
7364 Assert(!pVar->fRegAcquired);
7365
7366 uint8_t idxReg = pVar->idxReg;
7367 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7368 {
7369 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7370 && pVar->enmKind < kIemNativeVarKind_End);
7371 pVar->fRegAcquired = true;
7372 return idxReg;
7373 }
7374
7375 /*
7376 * If the kind of variable has not yet been set, default to 'stack'.
7377 */
7378 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7379 && pVar->enmKind < kIemNativeVarKind_End);
7380 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7381 iemNativeVarSetKindToStack(pReNative, idxVar);
7382
7383 /*
7384 * We have to allocate a register for the variable, even if its a stack one
7385 * as we don't know if there are modification being made to it before its
7386 * finalized (todo: analyze and insert hints about that?).
7387 *
7388 * If we can, we try get the correct register for argument variables. This
7389 * is assuming that most argument variables are fetched as close as possible
7390 * to the actual call, so that there aren't any interfering hidden calls
7391 * (memory accesses, etc) inbetween.
7392 *
7393 * If we cannot or it's a variable, we make sure no argument registers
7394 * that will be used by this MC block will be allocated here, and we always
7395 * prefer non-volatile registers to avoid needing to spill stuff for internal
7396 * call.
7397 */
7398 /** @todo Detect too early argument value fetches and warn about hidden
7399 * calls causing less optimal code to be generated in the python script. */
7400
7401 uint8_t const uArgNo = pVar->uArgNo;
7402 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
7403 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
7404 {
7405 idxReg = g_aidxIemNativeCallRegs[uArgNo];
7406 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7407 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
7408 }
7409 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
7410 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
7411 {
7412 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7413 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
7414 & ~pReNative->Core.bmHstRegsWithGstShadow
7415 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
7416 & fNotArgsMask;
7417 if (fRegs)
7418 {
7419 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
7420 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
7421 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
7422 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
7423 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
7424 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7425 }
7426 else
7427 {
7428 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7429 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
7430 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7431 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7432 }
7433 }
7434 else
7435 {
7436 idxReg = idxRegPref;
7437 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7438 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7439 }
7440 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7441 pVar->idxReg = idxReg;
7442
7443#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7444 pVar->fSimdReg = false;
7445#endif
7446
7447 /*
7448 * Load it off the stack if we've got a stack slot.
7449 */
7450 uint8_t const idxStackSlot = pVar->idxStackSlot;
7451 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7452 {
7453 Assert(fInitialized);
7454 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7455 switch (pVar->cbVar)
7456 {
7457 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
7458 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
7459 case 3: AssertFailed(); RT_FALL_THRU();
7460 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
7461 default: AssertFailed(); RT_FALL_THRU();
7462 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
7463 }
7464 }
7465 else
7466 {
7467 Assert(idxStackSlot == UINT8_MAX);
7468 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7469 }
7470 pVar->fRegAcquired = true;
7471 return idxReg;
7472}
7473
7474
7475#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7476/**
7477 * Makes sure variable @a idxVar has a SIMD register assigned to it and that it stays
7478 * fixed till we call iemNativeVarRegisterRelease.
7479 *
7480 * @returns The host register number.
7481 * @param pReNative The recompiler state.
7482 * @param idxVar The variable.
7483 * @param poff Pointer to the instruction buffer offset.
7484 * In case a register needs to be freed up or the value
7485 * loaded off the stack.
7486 * @param fInitialized Set if the variable must already have been initialized.
7487 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7488 * the case.
7489 * @param idxRegPref Preferred SIMD register number or UINT8_MAX.
7490 */
7491DECL_HIDDEN_THROW(uint8_t) iemNativeVarSimdRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7492 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7493{
7494 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7495 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7496 Assert( pVar->cbVar == sizeof(RTUINT128U)
7497 || pVar->cbVar == sizeof(RTUINT256U));
7498 Assert(!pVar->fRegAcquired);
7499
7500 uint8_t idxReg = pVar->idxReg;
7501 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs))
7502 {
7503 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7504 && pVar->enmKind < kIemNativeVarKind_End);
7505 pVar->fRegAcquired = true;
7506 return idxReg;
7507 }
7508
7509 /*
7510 * If the kind of variable has not yet been set, default to 'stack'.
7511 */
7512 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7513 && pVar->enmKind < kIemNativeVarKind_End);
7514 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7515 iemNativeVarSetKindToStack(pReNative, idxVar);
7516
7517 /*
7518 * We have to allocate a register for the variable, even if its a stack one
7519 * as we don't know if there are modification being made to it before its
7520 * finalized (todo: analyze and insert hints about that?).
7521 *
7522 * If we can, we try get the correct register for argument variables. This
7523 * is assuming that most argument variables are fetched as close as possible
7524 * to the actual call, so that there aren't any interfering hidden calls
7525 * (memory accesses, etc) inbetween.
7526 *
7527 * If we cannot or it's a variable, we make sure no argument registers
7528 * that will be used by this MC block will be allocated here, and we always
7529 * prefer non-volatile registers to avoid needing to spill stuff for internal
7530 * call.
7531 */
7532 /** @todo Detect too early argument value fetches and warn about hidden
7533 * calls causing less optimal code to be generated in the python script. */
7534
7535 uint8_t const uArgNo = pVar->uArgNo;
7536 Assert(uArgNo == UINT8_MAX); RT_NOREF(uArgNo); /* No SIMD registers as arguments for now. */
7537
7538 /* SIMD is bit simpler for now because there is no support for arguments. */
7539 if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
7540 || (pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegPref)))
7541 {
7542 uint32_t const fNotArgsMask = UINT32_MAX; //~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7543 uint32_t const fRegs = ~pReNative->Core.bmHstSimdRegs
7544 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
7545 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
7546 & fNotArgsMask;
7547 if (fRegs)
7548 {
7549 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
7550 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
7551 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows == 0);
7552 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg)));
7553 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7554 }
7555 else
7556 {
7557 idxReg = iemNativeSimdRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7558 IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & fNotArgsMask);
7559 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7560 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7561 }
7562 }
7563 else
7564 {
7565 idxReg = idxRegPref;
7566 AssertReleaseFailed(); //iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7567 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7568 }
7569 iemNativeSimdRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7570
7571 pVar->fSimdReg = true;
7572 pVar->idxReg = idxReg;
7573
7574 /*
7575 * Load it off the stack if we've got a stack slot.
7576 */
7577 uint8_t const idxStackSlot = pVar->idxStackSlot;
7578 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7579 {
7580 Assert(fInitialized);
7581 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7582 switch (pVar->cbVar)
7583 {
7584 case sizeof(RTUINT128U): *poff = iemNativeEmitLoadVecRegByBpU128(pReNative, *poff, idxReg, offDispBp); break;
7585 default: AssertFailed(); RT_FALL_THRU();
7586 case sizeof(RTUINT256U): *poff = iemNativeEmitLoadVecRegByBpU256(pReNative, *poff, idxReg, offDispBp); break;
7587 }
7588 }
7589 else
7590 {
7591 Assert(idxStackSlot == UINT8_MAX);
7592 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7593 }
7594 pVar->fRegAcquired = true;
7595 return idxReg;
7596}
7597#endif
7598
7599
7600/**
7601 * The value of variable @a idxVar will be written in full to the @a enmGstReg
7602 * guest register.
7603 *
7604 * This function makes sure there is a register for it and sets it to be the
7605 * current shadow copy of @a enmGstReg.
7606 *
7607 * @returns The host register number.
7608 * @param pReNative The recompiler state.
7609 * @param idxVar The variable.
7610 * @param enmGstReg The guest register this variable will be written to
7611 * after this call.
7612 * @param poff Pointer to the instruction buffer offset.
7613 * In case a register needs to be freed up or if the
7614 * variable content needs to be loaded off the stack.
7615 *
7616 * @note We DO NOT expect @a idxVar to be an argument variable,
7617 * because we can only in the commit stage of an instruction when this
7618 * function is used.
7619 */
7620DECL_HIDDEN_THROW(uint8_t)
7621iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
7622{
7623 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7624 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7625 Assert(!pVar->fRegAcquired);
7626 AssertMsgStmt( pVar->cbVar <= 8
7627 && ( pVar->enmKind == kIemNativeVarKind_Immediate
7628 || pVar->enmKind == kIemNativeVarKind_Stack),
7629 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
7630 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
7631 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7632
7633 /*
7634 * This shouldn't ever be used for arguments, unless it's in a weird else
7635 * branch that doesn't do any calling and even then it's questionable.
7636 *
7637 * However, in case someone writes crazy wrong MC code and does register
7638 * updates before making calls, just use the regular register allocator to
7639 * ensure we get a register suitable for the intended argument number.
7640 */
7641 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
7642
7643 /*
7644 * If there is already a register for the variable, we transfer/set the
7645 * guest shadow copy assignment to it.
7646 */
7647 uint8_t idxReg = pVar->idxReg;
7648 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7649 {
7650 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
7651 {
7652 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
7653 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
7654 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
7655 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
7656 }
7657 else
7658 {
7659 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
7660 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
7661 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
7662 }
7663 /** @todo figure this one out. We need some way of making sure the register isn't
7664 * modified after this point, just in case we start writing crappy MC code. */
7665 pVar->enmGstReg = enmGstReg;
7666 pVar->fRegAcquired = true;
7667 return idxReg;
7668 }
7669 Assert(pVar->uArgNo == UINT8_MAX);
7670
7671 /*
7672 * Because this is supposed to be the commit stage, we're just tag along with the
7673 * temporary register allocator and upgrade it to a variable register.
7674 */
7675 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
7676 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
7677 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
7678 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
7679 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
7680 pVar->idxReg = idxReg;
7681
7682 /*
7683 * Now we need to load the register value.
7684 */
7685 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7686 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7687 else
7688 {
7689 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7690 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7691 switch (pVar->cbVar)
7692 {
7693 case sizeof(uint64_t):
7694 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
7695 break;
7696 case sizeof(uint32_t):
7697 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
7698 break;
7699 case sizeof(uint16_t):
7700 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
7701 break;
7702 case sizeof(uint8_t):
7703 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
7704 break;
7705 default:
7706 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7707 }
7708 }
7709
7710 pVar->fRegAcquired = true;
7711 return idxReg;
7712}
7713
7714
7715/**
7716 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
7717 *
7718 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
7719 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
7720 * requirement of flushing anything in volatile host registers when making a
7721 * call.
7722 *
7723 * @returns New @a off value.
7724 * @param pReNative The recompiler state.
7725 * @param off The code buffer position.
7726 * @param fHstRegsNotToSave Set of registers not to save & restore.
7727 */
7728DECL_HIDDEN_THROW(uint32_t)
7729iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7730{
7731 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7732 if (fHstRegs)
7733 {
7734 do
7735 {
7736 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7737 fHstRegs &= ~RT_BIT_32(idxHstReg);
7738
7739 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7740 {
7741 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7742 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7743 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7744 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7745 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7746 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7747 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7748 {
7749 case kIemNativeVarKind_Stack:
7750 {
7751 /* Temporarily spill the variable register. */
7752 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7753 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7754 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7755 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7756 continue;
7757 }
7758
7759 case kIemNativeVarKind_Immediate:
7760 case kIemNativeVarKind_VarRef:
7761 case kIemNativeVarKind_GstRegRef:
7762 /* It is weird to have any of these loaded at this point. */
7763 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7764 continue;
7765
7766 case kIemNativeVarKind_End:
7767 case kIemNativeVarKind_Invalid:
7768 break;
7769 }
7770 AssertFailed();
7771 }
7772 else
7773 {
7774 /*
7775 * Allocate a temporary stack slot and spill the register to it.
7776 */
7777 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7778 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
7779 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7780 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
7781 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
7782 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7783 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7784 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7785 }
7786 } while (fHstRegs);
7787 }
7788#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7789 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
7790 if (fHstRegs)
7791 {
7792 do
7793 {
7794 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7795 fHstRegs &= ~RT_BIT_32(idxHstReg);
7796
7797 /*
7798 * Guest registers are flushed to CPUMCTX at the moment and don't need allocating a stack slot
7799 * which would be more difficult due to spanning multiple stack slots and different sizes
7800 * (besides we only have a limited amount of slots at the moment). Fixed temporary registers
7801 * don't need saving.
7802 */
7803 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp
7804 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved)
7805 continue;
7806
7807 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
7808
7809 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
7810 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7811 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7812 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7813 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
7814 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
7815 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
7816 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
7817 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7818 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7819 {
7820 case kIemNativeVarKind_Stack:
7821 {
7822 /* Temporarily spill the variable register. */
7823 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
7824 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7825 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7826 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7827 if (cbVar == sizeof(RTUINT128U))
7828 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7829 else
7830 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7831 continue;
7832 }
7833
7834 case kIemNativeVarKind_Immediate:
7835 case kIemNativeVarKind_VarRef:
7836 case kIemNativeVarKind_GstRegRef:
7837 /* It is weird to have any of these loaded at this point. */
7838 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7839 continue;
7840
7841 case kIemNativeVarKind_End:
7842 case kIemNativeVarKind_Invalid:
7843 break;
7844 }
7845 AssertFailed();
7846 } while (fHstRegs);
7847 }
7848#endif
7849 return off;
7850}
7851
7852
7853/**
7854 * Emit code to restore volatile registers after to a call to a helper.
7855 *
7856 * @returns New @a off value.
7857 * @param pReNative The recompiler state.
7858 * @param off The code buffer position.
7859 * @param fHstRegsNotToSave Set of registers not to save & restore.
7860 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
7861 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
7862 */
7863DECL_HIDDEN_THROW(uint32_t)
7864iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7865{
7866 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7867 if (fHstRegs)
7868 {
7869 do
7870 {
7871 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7872 fHstRegs &= ~RT_BIT_32(idxHstReg);
7873
7874 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7875 {
7876 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7877 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7878 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7879 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7880 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7881 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7882 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7883 {
7884 case kIemNativeVarKind_Stack:
7885 {
7886 /* Unspill the variable register. */
7887 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7888 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
7889 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7890 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7891 continue;
7892 }
7893
7894 case kIemNativeVarKind_Immediate:
7895 case kIemNativeVarKind_VarRef:
7896 case kIemNativeVarKind_GstRegRef:
7897 /* It is weird to have any of these loaded at this point. */
7898 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7899 continue;
7900
7901 case kIemNativeVarKind_End:
7902 case kIemNativeVarKind_Invalid:
7903 break;
7904 }
7905 AssertFailed();
7906 }
7907 else
7908 {
7909 /*
7910 * Restore from temporary stack slot.
7911 */
7912 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
7913 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
7914 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
7915 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
7916
7917 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7918 }
7919 } while (fHstRegs);
7920 }
7921#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7922 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
7923 if (fHstRegs)
7924 {
7925 do
7926 {
7927 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7928 fHstRegs &= ~RT_BIT_32(idxHstReg);
7929
7930 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp
7931 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved)
7932 continue;
7933 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
7934
7935 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
7936 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7937 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7938 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7939 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
7940 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
7941 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
7942 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
7943 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7944 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7945 {
7946 case kIemNativeVarKind_Stack:
7947 {
7948 /* Unspill the variable register. */
7949 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
7950 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7951 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
7952 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7953
7954 if (cbVar == sizeof(RTUINT128U))
7955 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7956 else
7957 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7958 continue;
7959 }
7960
7961 case kIemNativeVarKind_Immediate:
7962 case kIemNativeVarKind_VarRef:
7963 case kIemNativeVarKind_GstRegRef:
7964 /* It is weird to have any of these loaded at this point. */
7965 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7966 continue;
7967
7968 case kIemNativeVarKind_End:
7969 case kIemNativeVarKind_Invalid:
7970 break;
7971 }
7972 AssertFailed();
7973 } while (fHstRegs);
7974 }
7975#endif
7976 return off;
7977}
7978
7979
7980/**
7981 * Worker that frees the stack slots for variable @a idxVar if any allocated.
7982 *
7983 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
7984 *
7985 * ASSUMES that @a idxVar is valid and unpacked.
7986 */
7987DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7988{
7989 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
7990 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
7991 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7992 {
7993 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
7994 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
7995 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
7996 Assert(cSlots > 0);
7997 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
7998 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7999 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
8000 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
8001 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
8002 }
8003 else
8004 Assert(idxStackSlot == UINT8_MAX);
8005}
8006
8007
8008/**
8009 * Worker that frees a single variable.
8010 *
8011 * ASSUMES that @a idxVar is valid and unpacked.
8012 */
8013DECLHIDDEN(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8014{
8015 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
8016 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
8017 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
8018
8019 /* Free the host register first if any assigned. */
8020 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8021#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8022 if ( idxHstReg != UINT8_MAX
8023 && pReNative->Core.aVars[idxVar].fSimdReg)
8024 {
8025 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8026 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8027 pReNative->Core.aHstSimdRegs[idxHstReg].idxVar = UINT8_MAX;
8028 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
8029 }
8030 else
8031#endif
8032 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8033 {
8034 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8035 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8036 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8037 }
8038
8039 /* Free argument mapping. */
8040 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
8041 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
8042 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
8043
8044 /* Free the stack slots. */
8045 iemNativeVarFreeStackSlots(pReNative, idxVar);
8046
8047 /* Free the actual variable. */
8048 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
8049 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8050}
8051
8052
8053/**
8054 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
8055 */
8056DECLHIDDEN(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
8057{
8058 while (bmVars != 0)
8059 {
8060 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8061 bmVars &= ~RT_BIT_32(idxVar);
8062
8063#if 1 /** @todo optimize by simplifying this later... */
8064 iemNativeVarFreeOneWorker(pReNative, idxVar);
8065#else
8066 /* Only need to free the host register, the rest is done as bulk updates below. */
8067 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8068 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8069 {
8070 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8071 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8072 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8073 }
8074#endif
8075 }
8076#if 0 /** @todo optimize by simplifying this later... */
8077 pReNative->Core.bmVars = 0;
8078 pReNative->Core.bmStack = 0;
8079 pReNative->Core.u64ArgVars = UINT64_MAX;
8080#endif
8081}
8082
8083
8084
8085/*********************************************************************************************************************************
8086* Emitters for IEM_MC_CALL_CIMPL_XXX *
8087*********************************************************************************************************************************/
8088
8089/**
8090 * Emits code to load a reference to the given guest register into @a idxGprDst.
8091 */
8092DECL_HIDDEN_THROW(uint32_t)
8093iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
8094 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
8095{
8096#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8097 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
8098#endif
8099
8100 /*
8101 * Get the offset relative to the CPUMCTX structure.
8102 */
8103 uint32_t offCpumCtx;
8104 switch (enmClass)
8105 {
8106 case kIemNativeGstRegRef_Gpr:
8107 Assert(idxRegInClass < 16);
8108 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
8109 break;
8110
8111 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
8112 Assert(idxRegInClass < 4);
8113 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
8114 break;
8115
8116 case kIemNativeGstRegRef_EFlags:
8117 Assert(idxRegInClass == 0);
8118 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
8119 break;
8120
8121 case kIemNativeGstRegRef_MxCsr:
8122 Assert(idxRegInClass == 0);
8123 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
8124 break;
8125
8126 case kIemNativeGstRegRef_FpuReg:
8127 Assert(idxRegInClass < 8);
8128 AssertFailed(); /** @todo what kind of indexing? */
8129 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8130 break;
8131
8132 case kIemNativeGstRegRef_MReg:
8133 Assert(idxRegInClass < 8);
8134 AssertFailed(); /** @todo what kind of indexing? */
8135 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8136 break;
8137
8138 case kIemNativeGstRegRef_XReg:
8139 Assert(idxRegInClass < 16);
8140 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
8141 break;
8142
8143 case kIemNativeGstRegRef_X87: /* Not a register actually but we would just duplicate code otherwise. */
8144 Assert(idxRegInClass == 0);
8145 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87);
8146 break;
8147
8148 case kIemNativeGstRegRef_XState: /* Not a register actually but we would just duplicate code otherwise. */
8149 Assert(idxRegInClass == 0);
8150 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState);
8151 break;
8152
8153 default:
8154 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
8155 }
8156
8157 /*
8158 * Load the value into the destination register.
8159 */
8160#ifdef RT_ARCH_AMD64
8161 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
8162
8163#elif defined(RT_ARCH_ARM64)
8164 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8165 Assert(offCpumCtx < 4096);
8166 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
8167
8168#else
8169# error "Port me!"
8170#endif
8171
8172 return off;
8173}
8174
8175
8176/**
8177 * Common code for CIMPL and AIMPL calls.
8178 *
8179 * These are calls that uses argument variables and such. They should not be
8180 * confused with internal calls required to implement an MC operation,
8181 * like a TLB load and similar.
8182 *
8183 * Upon return all that is left to do is to load any hidden arguments and
8184 * perform the call. All argument variables are freed.
8185 *
8186 * @returns New code buffer offset; throws VBox status code on error.
8187 * @param pReNative The native recompile state.
8188 * @param off The code buffer offset.
8189 * @param cArgs The total nubmer of arguments (includes hidden
8190 * count).
8191 * @param cHiddenArgs The number of hidden arguments. The hidden
8192 * arguments must not have any variable declared for
8193 * them, whereas all the regular arguments must
8194 * (tstIEMCheckMc ensures this).
8195 */
8196DECL_HIDDEN_THROW(uint32_t)
8197iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
8198{
8199#ifdef VBOX_STRICT
8200 /*
8201 * Assert sanity.
8202 */
8203 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
8204 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
8205 for (unsigned i = 0; i < cHiddenArgs; i++)
8206 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
8207 for (unsigned i = cHiddenArgs; i < cArgs; i++)
8208 {
8209 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
8210 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
8211 }
8212 iemNativeRegAssertSanity(pReNative);
8213#endif
8214
8215 /* We don't know what the called function makes use of, so flush any pending register writes. */
8216 off = iemNativeRegFlushPendingWrites(pReNative, off);
8217
8218 /*
8219 * Before we do anything else, go over variables that are referenced and
8220 * make sure they are not in a register.
8221 */
8222 uint32_t bmVars = pReNative->Core.bmVars;
8223 if (bmVars)
8224 {
8225 do
8226 {
8227 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8228 bmVars &= ~RT_BIT_32(idxVar);
8229
8230 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
8231 {
8232 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
8233#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8234 if ( idxRegOld != UINT8_MAX
8235 && pReNative->Core.aVars[idxVar].fSimdReg)
8236 {
8237 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8238 Assert(pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U) || pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT256U));
8239
8240 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8241 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8242 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8243 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8244 if (pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U))
8245 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8246 else
8247 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8248
8249 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
8250 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
8251
8252 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8253 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
8254 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8255 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
8256 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
8257 }
8258 else
8259#endif
8260 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
8261 {
8262 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8263 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8264 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8265 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8266 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8267
8268 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8269 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
8270 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8271 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
8272 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
8273 }
8274 }
8275 } while (bmVars != 0);
8276#if 0 //def VBOX_STRICT
8277 iemNativeRegAssertSanity(pReNative);
8278#endif
8279 }
8280
8281 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
8282
8283 /*
8284 * First, go over the host registers that will be used for arguments and make
8285 * sure they either hold the desired argument or are free.
8286 */
8287 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
8288 {
8289 for (uint32_t i = 0; i < cRegArgs; i++)
8290 {
8291 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8292 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8293 {
8294 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
8295 {
8296 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
8297 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8298 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8299 Assert(pVar->idxReg == idxArgReg);
8300 uint8_t const uArgNo = pVar->uArgNo;
8301 if (uArgNo == i)
8302 { /* prefect */ }
8303 /* The variable allocator logic should make sure this is impossible,
8304 except for when the return register is used as a parameter (ARM,
8305 but not x86). */
8306#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
8307 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
8308 {
8309# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8310# error "Implement this"
8311# endif
8312 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
8313 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
8314 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
8315 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8316 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
8317 }
8318#endif
8319 else
8320 {
8321 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8322
8323 if (pVar->enmKind == kIemNativeVarKind_Stack)
8324 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
8325 else
8326 {
8327 /* just free it, can be reloaded if used again */
8328 pVar->idxReg = UINT8_MAX;
8329 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
8330 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
8331 }
8332 }
8333 }
8334 else
8335 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
8336 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
8337 }
8338 }
8339#if 0 //def VBOX_STRICT
8340 iemNativeRegAssertSanity(pReNative);
8341#endif
8342 }
8343
8344 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
8345
8346#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8347 /*
8348 * If there are any stack arguments, make sure they are in their place as well.
8349 *
8350 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
8351 * the caller) be loading it later and it must be free (see first loop).
8352 */
8353 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
8354 {
8355 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
8356 {
8357 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8358 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
8359 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8360 {
8361 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
8362 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
8363 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
8364 pVar->idxReg = UINT8_MAX;
8365 }
8366 else
8367 {
8368 /* Use ARG0 as temp for stuff we need registers for. */
8369 switch (pVar->enmKind)
8370 {
8371 case kIemNativeVarKind_Stack:
8372 {
8373 uint8_t const idxStackSlot = pVar->idxStackSlot;
8374 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8375 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
8376 iemNativeStackCalcBpDisp(idxStackSlot));
8377 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8378 continue;
8379 }
8380
8381 case kIemNativeVarKind_Immediate:
8382 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
8383 continue;
8384
8385 case kIemNativeVarKind_VarRef:
8386 {
8387 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8388 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8389 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8390 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8391 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8392# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8393 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8394 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8395 if ( fSimdReg
8396 && idxRegOther != UINT8_MAX)
8397 {
8398 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8399 if (cbVar == sizeof(RTUINT128U))
8400 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8401 else
8402 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8403 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8404 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8405 }
8406 else
8407# endif
8408 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8409 {
8410 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8411 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8412 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8413 }
8414 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8415 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8416 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
8417 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8418 continue;
8419 }
8420
8421 case kIemNativeVarKind_GstRegRef:
8422 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
8423 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8424 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8425 continue;
8426
8427 case kIemNativeVarKind_Invalid:
8428 case kIemNativeVarKind_End:
8429 break;
8430 }
8431 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8432 }
8433 }
8434# if 0 //def VBOX_STRICT
8435 iemNativeRegAssertSanity(pReNative);
8436# endif
8437 }
8438#else
8439 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
8440#endif
8441
8442 /*
8443 * Make sure the argument variables are loaded into their respective registers.
8444 *
8445 * We can optimize this by ASSUMING that any register allocations are for
8446 * registeres that have already been loaded and are ready. The previous step
8447 * saw to that.
8448 */
8449 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
8450 {
8451 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8452 {
8453 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8454 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8455 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
8456 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
8457 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
8458 else
8459 {
8460 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8461 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8462 {
8463 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
8464 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
8465 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
8466 | RT_BIT_32(idxArgReg);
8467 pVar->idxReg = idxArgReg;
8468 }
8469 else
8470 {
8471 /* Use ARG0 as temp for stuff we need registers for. */
8472 switch (pVar->enmKind)
8473 {
8474 case kIemNativeVarKind_Stack:
8475 {
8476 uint8_t const idxStackSlot = pVar->idxStackSlot;
8477 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8478 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
8479 continue;
8480 }
8481
8482 case kIemNativeVarKind_Immediate:
8483 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
8484 continue;
8485
8486 case kIemNativeVarKind_VarRef:
8487 {
8488 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8489 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8490 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
8491 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8492 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8493 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8494#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8495 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8496 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8497 if ( fSimdReg
8498 && idxRegOther != UINT8_MAX)
8499 {
8500 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8501 if (cbVar == sizeof(RTUINT128U))
8502 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8503 else
8504 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8505 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8506 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8507 }
8508 else
8509#endif
8510 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8511 {
8512 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8513 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8514 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8515 }
8516 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8517 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8518 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
8519 continue;
8520 }
8521
8522 case kIemNativeVarKind_GstRegRef:
8523 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
8524 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8525 continue;
8526
8527 case kIemNativeVarKind_Invalid:
8528 case kIemNativeVarKind_End:
8529 break;
8530 }
8531 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8532 }
8533 }
8534 }
8535#if 0 //def VBOX_STRICT
8536 iemNativeRegAssertSanity(pReNative);
8537#endif
8538 }
8539#ifdef VBOX_STRICT
8540 else
8541 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8542 {
8543 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
8544 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
8545 }
8546#endif
8547
8548 /*
8549 * Free all argument variables (simplified).
8550 * Their lifetime always expires with the call they are for.
8551 */
8552 /** @todo Make the python script check that arguments aren't used after
8553 * IEM_MC_CALL_XXXX. */
8554 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
8555 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
8556 * an argument value. There is also some FPU stuff. */
8557 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
8558 {
8559 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
8560 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8561
8562 /* no need to free registers: */
8563 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
8564 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
8565 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
8566 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
8567 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
8568 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
8569
8570 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
8571 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8572 iemNativeVarFreeStackSlots(pReNative, idxVar);
8573 }
8574 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8575
8576 /*
8577 * Flush volatile registers as we make the call.
8578 */
8579 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
8580
8581 return off;
8582}
8583
8584
8585
8586/*********************************************************************************************************************************
8587* TLB Lookup. *
8588*********************************************************************************************************************************/
8589
8590/**
8591 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
8592 */
8593DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
8594{
8595 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
8596 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
8597 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
8598 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
8599
8600 /* Do the lookup manually. */
8601 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
8602 uint64_t const uTag = IEMTLB_CALC_TAG( &pVCpu->iem.s.DataTlb, GCPtrFlat);
8603 PIEMTLBENTRY const pTlbe = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
8604 if (RT_LIKELY(pTlbe->uTag == uTag))
8605 {
8606 /*
8607 * Check TLB page table level access flags.
8608 */
8609 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
8610 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
8611 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
8612 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
8613 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
8614 | IEMTLBE_F_PG_UNASSIGNED
8615 | IEMTLBE_F_PT_NO_ACCESSED
8616 | fNoWriteNoDirty | fNoUser);
8617 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
8618 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
8619 {
8620 /*
8621 * Return the address.
8622 */
8623 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
8624 if ((uintptr_t)pbAddr == uResult)
8625 return;
8626 RT_NOREF(cbMem);
8627 AssertFailed();
8628 }
8629 else
8630 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
8631 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
8632 }
8633 else
8634 AssertFailed();
8635 RT_BREAKPOINT();
8636}
8637
8638/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
8639
8640
8641
8642/*********************************************************************************************************************************
8643* Recompiler Core. *
8644*********************************************************************************************************************************/
8645
8646/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
8647static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
8648{
8649 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
8650 pDis->cbCachedInstr += cbMaxRead;
8651 RT_NOREF(cbMinRead);
8652 return VERR_NO_DATA;
8653}
8654
8655
8656DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
8657{
8658 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
8659 {
8660#define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */
8661 ENTRY(fLocalForcedActions),
8662 ENTRY(iem.s.rcPassUp),
8663 ENTRY(iem.s.fExec),
8664 ENTRY(iem.s.pbInstrBuf),
8665 ENTRY(iem.s.uInstrBufPc),
8666 ENTRY(iem.s.GCPhysInstrBuf),
8667 ENTRY(iem.s.cbInstrBufTotal),
8668 ENTRY(iem.s.idxTbCurInstr),
8669#ifdef VBOX_WITH_STATISTICS
8670 ENTRY(iem.s.StatNativeTlbHitsForFetch),
8671 ENTRY(iem.s.StatNativeTlbHitsForStore),
8672 ENTRY(iem.s.StatNativeTlbHitsForStack),
8673 ENTRY(iem.s.StatNativeTlbHitsForMapped),
8674 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
8675 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
8676 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
8677 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
8678#endif
8679 ENTRY(iem.s.DataTlb.aEntries),
8680 ENTRY(iem.s.DataTlb.uTlbRevision),
8681 ENTRY(iem.s.DataTlb.uTlbPhysRev),
8682 ENTRY(iem.s.DataTlb.cTlbHits),
8683 ENTRY(iem.s.CodeTlb.aEntries),
8684 ENTRY(iem.s.CodeTlb.uTlbRevision),
8685 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
8686 ENTRY(iem.s.CodeTlb.cTlbHits),
8687 ENTRY(pVMR3),
8688 ENTRY(cpum.GstCtx.rax),
8689 ENTRY(cpum.GstCtx.ah),
8690 ENTRY(cpum.GstCtx.rcx),
8691 ENTRY(cpum.GstCtx.ch),
8692 ENTRY(cpum.GstCtx.rdx),
8693 ENTRY(cpum.GstCtx.dh),
8694 ENTRY(cpum.GstCtx.rbx),
8695 ENTRY(cpum.GstCtx.bh),
8696 ENTRY(cpum.GstCtx.rsp),
8697 ENTRY(cpum.GstCtx.rbp),
8698 ENTRY(cpum.GstCtx.rsi),
8699 ENTRY(cpum.GstCtx.rdi),
8700 ENTRY(cpum.GstCtx.r8),
8701 ENTRY(cpum.GstCtx.r9),
8702 ENTRY(cpum.GstCtx.r10),
8703 ENTRY(cpum.GstCtx.r11),
8704 ENTRY(cpum.GstCtx.r12),
8705 ENTRY(cpum.GstCtx.r13),
8706 ENTRY(cpum.GstCtx.r14),
8707 ENTRY(cpum.GstCtx.r15),
8708 ENTRY(cpum.GstCtx.es.Sel),
8709 ENTRY(cpum.GstCtx.es.u64Base),
8710 ENTRY(cpum.GstCtx.es.u32Limit),
8711 ENTRY(cpum.GstCtx.es.Attr),
8712 ENTRY(cpum.GstCtx.cs.Sel),
8713 ENTRY(cpum.GstCtx.cs.u64Base),
8714 ENTRY(cpum.GstCtx.cs.u32Limit),
8715 ENTRY(cpum.GstCtx.cs.Attr),
8716 ENTRY(cpum.GstCtx.ss.Sel),
8717 ENTRY(cpum.GstCtx.ss.u64Base),
8718 ENTRY(cpum.GstCtx.ss.u32Limit),
8719 ENTRY(cpum.GstCtx.ss.Attr),
8720 ENTRY(cpum.GstCtx.ds.Sel),
8721 ENTRY(cpum.GstCtx.ds.u64Base),
8722 ENTRY(cpum.GstCtx.ds.u32Limit),
8723 ENTRY(cpum.GstCtx.ds.Attr),
8724 ENTRY(cpum.GstCtx.fs.Sel),
8725 ENTRY(cpum.GstCtx.fs.u64Base),
8726 ENTRY(cpum.GstCtx.fs.u32Limit),
8727 ENTRY(cpum.GstCtx.fs.Attr),
8728 ENTRY(cpum.GstCtx.gs.Sel),
8729 ENTRY(cpum.GstCtx.gs.u64Base),
8730 ENTRY(cpum.GstCtx.gs.u32Limit),
8731 ENTRY(cpum.GstCtx.gs.Attr),
8732 ENTRY(cpum.GstCtx.rip),
8733 ENTRY(cpum.GstCtx.eflags),
8734 ENTRY(cpum.GstCtx.uRipInhibitInt),
8735 ENTRY(cpum.GstCtx.cr0),
8736 ENTRY(cpum.GstCtx.cr4),
8737 ENTRY(cpum.GstCtx.aXcr[0]),
8738 ENTRY(cpum.GstCtx.aXcr[1]),
8739#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8740 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
8741 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
8742 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
8743 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
8744 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
8745 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
8746 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
8747 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
8748 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
8749 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
8750 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
8751 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
8752 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
8753 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
8754 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
8755 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
8756 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
8757 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
8758 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
8759 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
8760 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
8761 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
8762 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
8763 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
8764 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
8765 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
8766 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
8767 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
8768 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
8769 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
8770 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
8771 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
8772#endif
8773#undef ENTRY
8774 };
8775#ifdef VBOX_STRICT
8776 static bool s_fOrderChecked = false;
8777 if (!s_fOrderChecked)
8778 {
8779 s_fOrderChecked = true;
8780 uint32_t offPrev = s_aMembers[0].off;
8781 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
8782 {
8783 Assert(s_aMembers[i].off > offPrev);
8784 offPrev = s_aMembers[i].off;
8785 }
8786 }
8787#endif
8788
8789 /*
8790 * Binary lookup.
8791 */
8792 unsigned iStart = 0;
8793 unsigned iEnd = RT_ELEMENTS(s_aMembers);
8794 for (;;)
8795 {
8796 unsigned const iCur = iStart + (iEnd - iStart) / 2;
8797 uint32_t const offCur = s_aMembers[iCur].off;
8798 if (off < offCur)
8799 {
8800 if (iCur != iStart)
8801 iEnd = iCur;
8802 else
8803 break;
8804 }
8805 else if (off > offCur)
8806 {
8807 if (iCur + 1 < iEnd)
8808 iStart = iCur + 1;
8809 else
8810 break;
8811 }
8812 else
8813 return s_aMembers[iCur].pszName;
8814 }
8815#ifdef VBOX_WITH_STATISTICS
8816 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
8817 return "iem.s.acThreadedFuncStats[iFn]";
8818#endif
8819 return NULL;
8820}
8821
8822
8823DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
8824{
8825 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
8826#if defined(RT_ARCH_AMD64)
8827 static const char * const a_apszMarkers[] =
8828 {
8829 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
8830 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
8831 };
8832#endif
8833
8834 char szDisBuf[512];
8835 DISSTATE Dis;
8836 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
8837 uint32_t const cNative = pTb->Native.cInstructions;
8838 uint32_t offNative = 0;
8839#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8840 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
8841#endif
8842 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
8843 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
8844 : DISCPUMODE_64BIT;
8845#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8846 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
8847#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8848 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
8849#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8850# error "Port me"
8851#else
8852 csh hDisasm = ~(size_t)0;
8853# if defined(RT_ARCH_AMD64)
8854 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
8855# elif defined(RT_ARCH_ARM64)
8856 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
8857# else
8858# error "Port me"
8859# endif
8860 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
8861
8862 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
8863 //Assert(rcCs == CS_ERR_OK);
8864#endif
8865
8866 /*
8867 * Print TB info.
8868 */
8869 pHlp->pfnPrintf(pHlp,
8870 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
8871 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
8872 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
8873 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
8874#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8875 if (pDbgInfo && pDbgInfo->cEntries > 1)
8876 {
8877 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
8878
8879 /*
8880 * This disassembly is driven by the debug info which follows the native
8881 * code and indicates when it starts with the next guest instructions,
8882 * where labels are and such things.
8883 */
8884 uint32_t idxThreadedCall = 0;
8885 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
8886 uint8_t idxRange = UINT8_MAX;
8887 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
8888 uint32_t offRange = 0;
8889 uint32_t offOpcodes = 0;
8890 uint32_t const cbOpcodes = pTb->cbOpcodes;
8891 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
8892 uint32_t const cDbgEntries = pDbgInfo->cEntries;
8893 uint32_t iDbgEntry = 1;
8894 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
8895
8896 while (offNative < cNative)
8897 {
8898 /* If we're at or have passed the point where the next chunk of debug
8899 info starts, process it. */
8900 if (offDbgNativeNext <= offNative)
8901 {
8902 offDbgNativeNext = UINT32_MAX;
8903 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
8904 {
8905 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
8906 {
8907 case kIemTbDbgEntryType_GuestInstruction:
8908 {
8909 /* Did the exec flag change? */
8910 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
8911 {
8912 pHlp->pfnPrintf(pHlp,
8913 " fExec change %#08x -> %#08x %s\n",
8914 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
8915 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
8916 szDisBuf, sizeof(szDisBuf)));
8917 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
8918 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
8919 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
8920 : DISCPUMODE_64BIT;
8921 }
8922
8923 /* New opcode range? We need to fend up a spurious debug info entry here for cases
8924 where the compilation was aborted before the opcode was recorded and the actual
8925 instruction was translated to a threaded call. This may happen when we run out
8926 of ranges, or when some complicated interrupts/FFs are found to be pending or
8927 similar. So, we just deal with it here rather than in the compiler code as it
8928 is a lot simpler to do here. */
8929 if ( idxRange == UINT8_MAX
8930 || idxRange >= cRanges
8931 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
8932 {
8933 idxRange += 1;
8934 if (idxRange < cRanges)
8935 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
8936 else
8937 continue;
8938 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
8939 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
8940 + (pTb->aRanges[idxRange].idxPhysPage == 0
8941 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
8942 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
8943 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
8944 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
8945 pTb->aRanges[idxRange].idxPhysPage);
8946 GCPhysPc += offRange;
8947 }
8948
8949 /* Disassemble the instruction. */
8950 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
8951 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
8952 uint32_t cbInstr = 1;
8953 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
8954 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
8955 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
8956 if (RT_SUCCESS(rc))
8957 {
8958 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8959 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8960 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8961 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8962
8963 static unsigned const s_offMarker = 55;
8964 static char const s_szMarker[] = " ; <--- guest";
8965 if (cch < s_offMarker)
8966 {
8967 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
8968 cch = s_offMarker;
8969 }
8970 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
8971 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
8972
8973 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
8974 }
8975 else
8976 {
8977 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
8978 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
8979 cbInstr = 1;
8980 }
8981 GCPhysPc += cbInstr;
8982 offOpcodes += cbInstr;
8983 offRange += cbInstr;
8984 continue;
8985 }
8986
8987 case kIemTbDbgEntryType_ThreadedCall:
8988 pHlp->pfnPrintf(pHlp,
8989 " Call #%u to %s (%u args) - %s\n",
8990 idxThreadedCall,
8991 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
8992 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
8993 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
8994 idxThreadedCall++;
8995 continue;
8996
8997 case kIemTbDbgEntryType_GuestRegShadowing:
8998 {
8999 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9000 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
9001 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
9002 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
9003 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9004 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
9005 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
9006 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
9007 else
9008 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
9009 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
9010 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9011 continue;
9012 }
9013
9014#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9015 case kIemTbDbgEntryType_GuestSimdRegShadowing:
9016 {
9017 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9018 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
9019 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
9020 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
9021 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9022 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
9023 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
9024 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
9025 else
9026 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
9027 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
9028 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9029 continue;
9030 }
9031#endif
9032
9033 case kIemTbDbgEntryType_Label:
9034 {
9035 const char *pszName = "what_the_fudge";
9036 const char *pszComment = "";
9037 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
9038 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
9039 {
9040 case kIemNativeLabelType_Return: pszName = "Return"; break;
9041 case kIemNativeLabelType_ReturnBreak: pszName = "ReturnBreak"; break;
9042 case kIemNativeLabelType_ReturnWithFlags: pszName = "ReturnWithFlags"; break;
9043 case kIemNativeLabelType_NonZeroRetOrPassUp: pszName = "NonZeroRetOrPassUp"; break;
9044 case kIemNativeLabelType_RaiseDe: pszName = "RaiseDe"; break;
9045 case kIemNativeLabelType_RaiseUd: pszName = "RaiseUd"; break;
9046 case kIemNativeLabelType_RaiseSseRelated: pszName = "RaiseSseRelated"; break;
9047 case kIemNativeLabelType_RaiseAvxRelated: pszName = "RaiseAvxRelated"; break;
9048 case kIemNativeLabelType_RaiseNm: pszName = "RaiseNm"; break;
9049 case kIemNativeLabelType_RaiseGp0: pszName = "RaiseGp0"; break;
9050 case kIemNativeLabelType_RaiseMf: pszName = "RaiseMf"; break;
9051 case kIemNativeLabelType_RaiseXf: pszName = "RaiseXf"; break;
9052 case kIemNativeLabelType_ObsoleteTb: pszName = "ObsoleteTb"; break;
9053 case kIemNativeLabelType_NeedCsLimChecking: pszName = "NeedCsLimChecking"; break;
9054 case kIemNativeLabelType_CheckBranchMiss: pszName = "CheckBranchMiss"; break;
9055 case kIemNativeLabelType_If:
9056 pszName = "If";
9057 fNumbered = true;
9058 break;
9059 case kIemNativeLabelType_Else:
9060 pszName = "Else";
9061 fNumbered = true;
9062 pszComment = " ; regs state restored pre-if-block";
9063 break;
9064 case kIemNativeLabelType_Endif:
9065 pszName = "Endif";
9066 fNumbered = true;
9067 break;
9068 case kIemNativeLabelType_CheckIrq:
9069 pszName = "CheckIrq_CheckVM";
9070 fNumbered = true;
9071 break;
9072 case kIemNativeLabelType_TlbLookup:
9073 pszName = "TlbLookup";
9074 fNumbered = true;
9075 break;
9076 case kIemNativeLabelType_TlbMiss:
9077 pszName = "TlbMiss";
9078 fNumbered = true;
9079 break;
9080 case kIemNativeLabelType_TlbDone:
9081 pszName = "TlbDone";
9082 fNumbered = true;
9083 break;
9084 case kIemNativeLabelType_Invalid:
9085 case kIemNativeLabelType_End:
9086 break;
9087 }
9088 if (fNumbered)
9089 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
9090 else
9091 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
9092 continue;
9093 }
9094
9095 case kIemTbDbgEntryType_NativeOffset:
9096 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
9097 Assert(offDbgNativeNext > offNative);
9098 break;
9099
9100#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
9101 case kIemTbDbgEntryType_DelayedPcUpdate:
9102 pHlp->pfnPrintf(pHlp,
9103 " Updating guest PC value by %u (cInstrSkipped=%u)\n",
9104 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
9105 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
9106 continue;
9107#endif
9108
9109 default:
9110 AssertFailed();
9111 }
9112 iDbgEntry++;
9113 break;
9114 }
9115 }
9116
9117 /*
9118 * Disassemble the next native instruction.
9119 */
9120 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9121# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9122 uint32_t cbInstr = sizeof(paNative[0]);
9123 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9124 if (RT_SUCCESS(rc))
9125 {
9126# if defined(RT_ARCH_AMD64)
9127 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9128 {
9129 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9130 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9131 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9132 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9133 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9134 uInfo & 0x8000 ? "recompiled" : "todo");
9135 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9136 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9137 else
9138 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9139 }
9140 else
9141# endif
9142 {
9143 const char *pszAnnotation = NULL;
9144# ifdef RT_ARCH_AMD64
9145 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9146 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9147 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9148 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9149 PCDISOPPARAM pMemOp;
9150 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))
9151 pMemOp = &Dis.Param1;
9152 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))
9153 pMemOp = &Dis.Param2;
9154 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))
9155 pMemOp = &Dis.Param3;
9156 else
9157 pMemOp = NULL;
9158 if ( pMemOp
9159 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
9160 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
9161 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
9162 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
9163
9164#elif defined(RT_ARCH_ARM64)
9165 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9166 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9167 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9168# else
9169# error "Port me"
9170# endif
9171 if (pszAnnotation)
9172 {
9173 static unsigned const s_offAnnotation = 55;
9174 size_t const cchAnnotation = strlen(pszAnnotation);
9175 size_t cchDis = strlen(szDisBuf);
9176 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
9177 {
9178 if (cchDis < s_offAnnotation)
9179 {
9180 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
9181 cchDis = s_offAnnotation;
9182 }
9183 szDisBuf[cchDis++] = ' ';
9184 szDisBuf[cchDis++] = ';';
9185 szDisBuf[cchDis++] = ' ';
9186 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
9187 }
9188 }
9189 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9190 }
9191 }
9192 else
9193 {
9194# if defined(RT_ARCH_AMD64)
9195 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9196 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9197# elif defined(RT_ARCH_ARM64)
9198 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9199# else
9200# error "Port me"
9201# endif
9202 cbInstr = sizeof(paNative[0]);
9203 }
9204 offNative += cbInstr / sizeof(paNative[0]);
9205
9206# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9207 cs_insn *pInstr;
9208 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9209 (uintptr_t)pNativeCur, 1, &pInstr);
9210 if (cInstrs > 0)
9211 {
9212 Assert(cInstrs == 1);
9213 const char *pszAnnotation = NULL;
9214# if defined(RT_ARCH_ARM64)
9215 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
9216 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
9217 {
9218 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
9219 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
9220 char *psz = strchr(pInstr->op_str, '[');
9221 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
9222 {
9223 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
9224 int32_t off = -1;
9225 psz += 4;
9226 if (*psz == ']')
9227 off = 0;
9228 else if (*psz == ',')
9229 {
9230 psz = RTStrStripL(psz + 1);
9231 if (*psz == '#')
9232 off = RTStrToInt32(&psz[1]);
9233 /** @todo deal with index registers and LSL as well... */
9234 }
9235 if (off >= 0)
9236 pszAnnotation = iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
9237 }
9238 }
9239# endif
9240
9241 size_t const cchOp = strlen(pInstr->op_str);
9242# if defined(RT_ARCH_AMD64)
9243 if (pszAnnotation)
9244 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
9245 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
9246 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9247 else
9248 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9249 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9250
9251# else
9252 if (pszAnnotation)
9253 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
9254 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
9255 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9256 else
9257 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9258 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9259# endif
9260 offNative += pInstr->size / sizeof(*pNativeCur);
9261 cs_free(pInstr, cInstrs);
9262 }
9263 else
9264 {
9265# if defined(RT_ARCH_AMD64)
9266 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9267 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9268# else
9269 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9270# endif
9271 offNative++;
9272 }
9273# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9274 }
9275 }
9276 else
9277#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
9278 {
9279 /*
9280 * No debug info, just disassemble the x86 code and then the native code.
9281 *
9282 * First the guest code:
9283 */
9284 for (unsigned i = 0; i < pTb->cRanges; i++)
9285 {
9286 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
9287 + (pTb->aRanges[i].idxPhysPage == 0
9288 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9289 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
9290 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9291 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
9292 unsigned off = pTb->aRanges[i].offOpcodes;
9293 /** @todo this ain't working when crossing pages! */
9294 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
9295 while (off < cbOpcodes)
9296 {
9297 uint32_t cbInstr = 1;
9298 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9299 &pTb->pabOpcodes[off], cbOpcodes - off,
9300 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9301 if (RT_SUCCESS(rc))
9302 {
9303 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9304 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9305 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9306 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9307 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
9308 GCPhysPc += cbInstr;
9309 off += cbInstr;
9310 }
9311 else
9312 {
9313 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
9314 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
9315 break;
9316 }
9317 }
9318 }
9319
9320 /*
9321 * Then the native code:
9322 */
9323 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
9324 while (offNative < cNative)
9325 {
9326 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9327# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9328 uint32_t cbInstr = sizeof(paNative[0]);
9329 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9330 if (RT_SUCCESS(rc))
9331 {
9332# if defined(RT_ARCH_AMD64)
9333 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9334 {
9335 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9336 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9337 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9338 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9339 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9340 uInfo & 0x8000 ? "recompiled" : "todo");
9341 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9342 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9343 else
9344 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9345 }
9346 else
9347# endif
9348 {
9349# ifdef RT_ARCH_AMD64
9350 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9351 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9352 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9353 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9354# elif defined(RT_ARCH_ARM64)
9355 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9356 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9357 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9358# else
9359# error "Port me"
9360# endif
9361 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9362 }
9363 }
9364 else
9365 {
9366# if defined(RT_ARCH_AMD64)
9367 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9368 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9369# else
9370 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9371# endif
9372 cbInstr = sizeof(paNative[0]);
9373 }
9374 offNative += cbInstr / sizeof(paNative[0]);
9375
9376# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9377 cs_insn *pInstr;
9378 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9379 (uintptr_t)pNativeCur, 1, &pInstr);
9380 if (cInstrs > 0)
9381 {
9382 Assert(cInstrs == 1);
9383# if defined(RT_ARCH_AMD64)
9384 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9385 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9386# else
9387 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9388 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9389# endif
9390 offNative += pInstr->size / sizeof(*pNativeCur);
9391 cs_free(pInstr, cInstrs);
9392 }
9393 else
9394 {
9395# if defined(RT_ARCH_AMD64)
9396 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9397 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9398# else
9399 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9400# endif
9401 offNative++;
9402 }
9403# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9404 }
9405 }
9406
9407#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9408 /* Cleanup. */
9409 cs_close(&hDisasm);
9410#endif
9411}
9412
9413
9414/**
9415 * Recompiles the given threaded TB into a native one.
9416 *
9417 * In case of failure the translation block will be returned as-is.
9418 *
9419 * @returns pTb.
9420 * @param pVCpu The cross context virtual CPU structure of the calling
9421 * thread.
9422 * @param pTb The threaded translation to recompile to native.
9423 */
9424DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
9425{
9426 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
9427
9428 /*
9429 * The first time thru, we allocate the recompiler state, the other times
9430 * we just need to reset it before using it again.
9431 */
9432 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
9433 if (RT_LIKELY(pReNative))
9434 iemNativeReInit(pReNative, pTb);
9435 else
9436 {
9437 pReNative = iemNativeInit(pVCpu, pTb);
9438 AssertReturn(pReNative, pTb);
9439 }
9440
9441#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9442 /*
9443 * First do liveness analysis. This is done backwards.
9444 */
9445 {
9446 uint32_t idxCall = pTb->Thrd.cCalls;
9447 if (idxCall <= pReNative->cLivenessEntriesAlloc)
9448 { /* likely */ }
9449 else
9450 {
9451 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
9452 while (idxCall > cAlloc)
9453 cAlloc *= 2;
9454 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
9455 AssertReturn(pvNew, pTb);
9456 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
9457 pReNative->cLivenessEntriesAlloc = cAlloc;
9458 }
9459 AssertReturn(idxCall > 0, pTb);
9460 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
9461
9462 /* The initial (final) entry. */
9463 idxCall--;
9464 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
9465
9466 /* Loop backwards thru the calls and fill in the other entries. */
9467 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
9468 while (idxCall > 0)
9469 {
9470 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
9471 if (pfnLiveness)
9472 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
9473 else
9474 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
9475 pCallEntry--;
9476 idxCall--;
9477 }
9478
9479# ifdef VBOX_WITH_STATISTICS
9480 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
9481 to 'clobbered' rather that 'input'. */
9482 /** @todo */
9483# endif
9484 }
9485#endif
9486
9487 /*
9488 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
9489 * for aborting if an error happens.
9490 */
9491 uint32_t cCallsLeft = pTb->Thrd.cCalls;
9492#ifdef LOG_ENABLED
9493 uint32_t const cCallsOrg = cCallsLeft;
9494#endif
9495 uint32_t off = 0;
9496 int rc = VINF_SUCCESS;
9497 IEMNATIVE_TRY_SETJMP(pReNative, rc)
9498 {
9499 /*
9500 * Emit prolog code (fixed).
9501 */
9502 off = iemNativeEmitProlog(pReNative, off);
9503
9504 /*
9505 * Convert the calls to native code.
9506 */
9507#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9508 int32_t iGstInstr = -1;
9509#endif
9510#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
9511 uint32_t cThreadedCalls = 0;
9512 uint32_t cRecompiledCalls = 0;
9513#endif
9514#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9515 uint32_t idxCurCall = 0;
9516#endif
9517 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
9518 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
9519 while (cCallsLeft-- > 0)
9520 {
9521 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
9522#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9523 pReNative->idxCurCall = idxCurCall;
9524#endif
9525
9526 /*
9527 * Debug info, assembly markup and statistics.
9528 */
9529#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
9530 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
9531 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
9532#endif
9533#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9534 iemNativeDbgInfoAddNativeOffset(pReNative, off);
9535 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
9536 {
9537 if (iGstInstr < (int32_t)pTb->cInstructions)
9538 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
9539 else
9540 Assert(iGstInstr == pTb->cInstructions);
9541 iGstInstr = pCallEntry->idxInstr;
9542 }
9543 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
9544#endif
9545#if defined(VBOX_STRICT)
9546 off = iemNativeEmitMarker(pReNative, off,
9547 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
9548#endif
9549#if defined(VBOX_STRICT)
9550 iemNativeRegAssertSanity(pReNative);
9551#endif
9552#ifdef VBOX_WITH_STATISTICS
9553 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
9554#endif
9555
9556 /*
9557 * Actual work.
9558 */
9559 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
9560 pfnRecom ? "(recompiled)" : "(todo)"));
9561 if (pfnRecom) /** @todo stats on this. */
9562 {
9563 off = pfnRecom(pReNative, off, pCallEntry);
9564 STAM_REL_STATS({cRecompiledCalls++;});
9565 }
9566 else
9567 {
9568 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
9569 STAM_REL_STATS({cThreadedCalls++;});
9570 }
9571 Assert(off <= pReNative->cInstrBufAlloc);
9572 Assert(pReNative->cCondDepth == 0);
9573
9574#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
9575 if (LogIs2Enabled())
9576 {
9577 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
9578# ifndef IEMLIVENESS_EXTENDED_LAYOUT
9579 static const char s_achState[] = "CUXI";
9580# else
9581 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
9582# endif
9583
9584 char szGpr[17];
9585 for (unsigned i = 0; i < 16; i++)
9586 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
9587 szGpr[16] = '\0';
9588
9589 char szSegBase[X86_SREG_COUNT + 1];
9590 char szSegLimit[X86_SREG_COUNT + 1];
9591 char szSegAttrib[X86_SREG_COUNT + 1];
9592 char szSegSel[X86_SREG_COUNT + 1];
9593 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
9594 {
9595 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
9596 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
9597 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
9598 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
9599 }
9600 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
9601 = szSegSel[X86_SREG_COUNT] = '\0';
9602
9603 char szEFlags[8];
9604 for (unsigned i = 0; i < 7; i++)
9605 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
9606 szEFlags[7] = '\0';
9607
9608 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
9609 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
9610 }
9611#endif
9612
9613 /*
9614 * Advance.
9615 */
9616 pCallEntry++;
9617#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9618 idxCurCall++;
9619#endif
9620 }
9621
9622 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
9623 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
9624 if (!cThreadedCalls)
9625 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
9626
9627 /*
9628 * Emit the epilog code.
9629 */
9630 uint32_t idxReturnLabel;
9631 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
9632
9633 /*
9634 * Generate special jump labels.
9635 */
9636 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
9637 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
9638 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
9639 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
9640
9641 /*
9642 * Generate simple TB tail labels that just calls a help with a pVCpu
9643 * arg and either return or longjmps/throws a non-zero status.
9644 *
9645 * The array entries must be ordered by enmLabel value so we can index
9646 * using fTailLabels bit numbers.
9647 */
9648 typedef IEM_DECL_NATIVE_HLP_PTR(int, PFNIEMNATIVESIMPLETAILLABELCALL,(PVMCPUCC pVCpu));
9649 static struct
9650 {
9651 IEMNATIVELABELTYPE enmLabel;
9652 PFNIEMNATIVESIMPLETAILLABELCALL pfnCallback;
9653 } const g_aSimpleTailLabels[] =
9654 {
9655 { kIemNativeLabelType_Invalid, NULL },
9656 { kIemNativeLabelType_RaiseDe, iemNativeHlpExecRaiseDe },
9657 { kIemNativeLabelType_RaiseUd, iemNativeHlpExecRaiseUd },
9658 { kIemNativeLabelType_RaiseSseRelated, iemNativeHlpExecRaiseSseRelated },
9659 { kIemNativeLabelType_RaiseAvxRelated, iemNativeHlpExecRaiseAvxRelated },
9660 { kIemNativeLabelType_RaiseNm, iemNativeHlpExecRaiseNm },
9661 { kIemNativeLabelType_RaiseGp0, iemNativeHlpExecRaiseGp0 },
9662 { kIemNativeLabelType_RaiseMf, iemNativeHlpExecRaiseMf },
9663 { kIemNativeLabelType_RaiseXf, iemNativeHlpExecRaiseXf },
9664 { kIemNativeLabelType_ObsoleteTb, iemNativeHlpObsoleteTb },
9665 { kIemNativeLabelType_NeedCsLimChecking, iemNativeHlpNeedCsLimChecking },
9666 { kIemNativeLabelType_CheckBranchMiss, iemNativeHlpCheckBranchMiss },
9667 };
9668 AssertCompile(RT_ELEMENTS(g_aSimpleTailLabels) == (unsigned)kIemNativeLabelType_LastSimple + 1U);
9669 AssertCompile(kIemNativeLabelType_Invalid == 0);
9670 uint64_t fTailLabels = pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastSimple + 1U) - 2U);
9671 if (fTailLabels)
9672 {
9673 do
9674 {
9675 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)(ASMBitFirstSetU64(fTailLabels) - 1U);
9676 fTailLabels &= ~RT_BIT_64(enmLabel);
9677 Assert(g_aSimpleTailLabels[enmLabel].enmLabel == enmLabel);
9678
9679 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
9680 Assert(idxLabel != UINT32_MAX);
9681 if (idxLabel != UINT32_MAX)
9682 {
9683 iemNativeLabelDefine(pReNative, idxLabel, off);
9684
9685 /* int pfnCallback(PVMCPUCC pVCpu) */
9686 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9687 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_aSimpleTailLabels[enmLabel].pfnCallback);
9688
9689 /* jump back to the return sequence. */
9690 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
9691 }
9692
9693 } while (fTailLabels);
9694 }
9695 }
9696 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
9697 {
9698 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
9699 return pTb;
9700 }
9701 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
9702 Assert(off <= pReNative->cInstrBufAlloc);
9703
9704 /*
9705 * Make sure all labels has been defined.
9706 */
9707 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
9708#ifdef VBOX_STRICT
9709 uint32_t const cLabels = pReNative->cLabels;
9710 for (uint32_t i = 0; i < cLabels; i++)
9711 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
9712#endif
9713
9714 /*
9715 * Allocate executable memory, copy over the code we've generated.
9716 */
9717 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
9718 if (pTbAllocator->pDelayedFreeHead)
9719 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
9720
9721 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
9722 AssertReturn(paFinalInstrBuf, pTb);
9723 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
9724
9725 /*
9726 * Apply fixups.
9727 */
9728 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
9729 uint32_t const cFixups = pReNative->cFixups;
9730 for (uint32_t i = 0; i < cFixups; i++)
9731 {
9732 Assert(paFixups[i].off < off);
9733 Assert(paFixups[i].idxLabel < cLabels);
9734 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
9735 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
9736 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
9737 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
9738 switch (paFixups[i].enmType)
9739 {
9740#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
9741 case kIemNativeFixupType_Rel32:
9742 Assert(paFixups[i].off + 4 <= off);
9743 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9744 continue;
9745
9746#elif defined(RT_ARCH_ARM64)
9747 case kIemNativeFixupType_RelImm26At0:
9748 {
9749 Assert(paFixups[i].off < off);
9750 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9751 Assert(offDisp >= -262144 && offDisp < 262144);
9752 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
9753 continue;
9754 }
9755
9756 case kIemNativeFixupType_RelImm19At5:
9757 {
9758 Assert(paFixups[i].off < off);
9759 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9760 Assert(offDisp >= -262144 && offDisp < 262144);
9761 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
9762 continue;
9763 }
9764
9765 case kIemNativeFixupType_RelImm14At5:
9766 {
9767 Assert(paFixups[i].off < off);
9768 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9769 Assert(offDisp >= -8192 && offDisp < 8192);
9770 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
9771 continue;
9772 }
9773
9774#endif
9775 case kIemNativeFixupType_Invalid:
9776 case kIemNativeFixupType_End:
9777 break;
9778 }
9779 AssertFailed();
9780 }
9781
9782 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
9783 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
9784
9785 /*
9786 * Convert the translation block.
9787 */
9788 RTMemFree(pTb->Thrd.paCalls);
9789 pTb->Native.paInstructions = paFinalInstrBuf;
9790 pTb->Native.cInstructions = off;
9791 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
9792#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9793 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
9794 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
9795#endif
9796
9797 Assert(pTbAllocator->cThreadedTbs > 0);
9798 pTbAllocator->cThreadedTbs -= 1;
9799 pTbAllocator->cNativeTbs += 1;
9800 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
9801
9802#ifdef LOG_ENABLED
9803 /*
9804 * Disassemble to the log if enabled.
9805 */
9806 if (LogIs3Enabled())
9807 {
9808 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
9809 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
9810# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
9811 RTLogFlush(NULL);
9812# endif
9813 }
9814#endif
9815 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
9816
9817 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
9818 return pTb;
9819}
9820
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette