VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 103865

Last change on this file since 103865 was 103865, checked in by vboxsync, 11 months ago

VMM/IEM: Implement native emitter for IEM_MC_CALL_SSE_AIMPL_2()/IEM_MC_CALL_SSE_AIMPL_3(), bugref:10614

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 399.4 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 103865 2024-03-15 11:56:15Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMN8veRecompilerTlbLookup.h"
103#include "IEMNativeFunctions.h"
104
105
106/*
107 * Narrow down configs here to avoid wasting time on unused configs here.
108 * Note! Same checks in IEMAllThrdRecompiler.cpp.
109 */
110
111#ifndef IEM_WITH_CODE_TLB
112# error The code TLB must be enabled for the recompiler.
113#endif
114
115#ifndef IEM_WITH_DATA_TLB
116# error The data TLB must be enabled for the recompiler.
117#endif
118
119#ifndef IEM_WITH_SETJMP
120# error The setjmp approach must be enabled for the recompiler.
121#endif
122
123/** @todo eliminate this clang build hack. */
124#if RT_CLANG_PREREQ(4, 0)
125# pragma GCC diagnostic ignored "-Wunused-function"
126#endif
127
128
129/*********************************************************************************************************************************
130* Internal Functions *
131*********************************************************************************************************************************/
132#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
133static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
134#endif
135DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
136DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
137 IEMNATIVEGSTREG enmGstReg, uint32_t off);
138DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
139
140
141/*********************************************************************************************************************************
142* Executable Memory Allocator *
143*********************************************************************************************************************************/
144/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
145 * Use an alternative chunk sub-allocator that does store internal data
146 * in the chunk.
147 *
148 * Using the RTHeapSimple is not practial on newer darwin systems where
149 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
150 * memory. We would have to change the protection of the whole chunk for
151 * every call to RTHeapSimple, which would be rather expensive.
152 *
153 * This alternative implemenation let restrict page protection modifications
154 * to the pages backing the executable memory we just allocated.
155 */
156#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
157/** The chunk sub-allocation unit size in bytes. */
158#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
159/** The chunk sub-allocation unit size as a shift factor. */
160#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
161
162#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
163# ifdef IEMNATIVE_USE_GDB_JIT
164# define IEMNATIVE_USE_GDB_JIT_ET_DYN
165
166/** GDB JIT: Code entry. */
167typedef struct GDBJITCODEENTRY
168{
169 struct GDBJITCODEENTRY *pNext;
170 struct GDBJITCODEENTRY *pPrev;
171 uint8_t *pbSymFile;
172 uint64_t cbSymFile;
173} GDBJITCODEENTRY;
174
175/** GDB JIT: Actions. */
176typedef enum GDBJITACTIONS : uint32_t
177{
178 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
179} GDBJITACTIONS;
180
181/** GDB JIT: Descriptor. */
182typedef struct GDBJITDESCRIPTOR
183{
184 uint32_t uVersion;
185 GDBJITACTIONS enmAction;
186 GDBJITCODEENTRY *pRelevant;
187 GDBJITCODEENTRY *pHead;
188 /** Our addition: */
189 GDBJITCODEENTRY *pTail;
190} GDBJITDESCRIPTOR;
191
192/** GDB JIT: Our simple symbol file data. */
193typedef struct GDBJITSYMFILE
194{
195 Elf64_Ehdr EHdr;
196# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
197 Elf64_Shdr aShdrs[5];
198# else
199 Elf64_Shdr aShdrs[7];
200 Elf64_Phdr aPhdrs[2];
201# endif
202 /** The dwarf ehframe data for the chunk. */
203 uint8_t abEhFrame[512];
204 char szzStrTab[128];
205 Elf64_Sym aSymbols[3];
206# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
207 Elf64_Sym aDynSyms[2];
208 Elf64_Dyn aDyn[6];
209# endif
210} GDBJITSYMFILE;
211
212extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
213extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
214
215/** Init once for g_IemNativeGdbJitLock. */
216static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
217/** Init once for the critical section. */
218static RTCRITSECT g_IemNativeGdbJitLock;
219
220/** GDB reads the info here. */
221GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
222
223/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
224DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
225{
226 ASMNopPause();
227}
228
229/** @callback_method_impl{FNRTONCE} */
230static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
231{
232 RT_NOREF(pvUser);
233 return RTCritSectInit(&g_IemNativeGdbJitLock);
234}
235
236
237# endif /* IEMNATIVE_USE_GDB_JIT */
238
239/**
240 * Per-chunk unwind info for non-windows hosts.
241 */
242typedef struct IEMEXECMEMCHUNKEHFRAME
243{
244# ifdef IEMNATIVE_USE_LIBUNWIND
245 /** The offset of the FDA into abEhFrame. */
246 uintptr_t offFda;
247# else
248 /** 'struct object' storage area. */
249 uint8_t abObject[1024];
250# endif
251# ifdef IEMNATIVE_USE_GDB_JIT
252# if 0
253 /** The GDB JIT 'symbol file' data. */
254 GDBJITSYMFILE GdbJitSymFile;
255# endif
256 /** The GDB JIT list entry. */
257 GDBJITCODEENTRY GdbJitEntry;
258# endif
259 /** The dwarf ehframe data for the chunk. */
260 uint8_t abEhFrame[512];
261} IEMEXECMEMCHUNKEHFRAME;
262/** Pointer to per-chunk info info for non-windows hosts. */
263typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
264#endif
265
266
267/**
268 * An chunk of executable memory.
269 */
270typedef struct IEMEXECMEMCHUNK
271{
272#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
273 /** Number of free items in this chunk. */
274 uint32_t cFreeUnits;
275 /** Hint were to start searching for free space in the allocation bitmap. */
276 uint32_t idxFreeHint;
277#else
278 /** The heap handle. */
279 RTHEAPSIMPLE hHeap;
280#endif
281 /** Pointer to the chunk. */
282 void *pvChunk;
283#ifdef IN_RING3
284 /**
285 * Pointer to the unwind information.
286 *
287 * This is used during C++ throw and longjmp (windows and probably most other
288 * platforms). Some debuggers (windbg) makes use of it as well.
289 *
290 * Windows: This is allocated from hHeap on windows because (at least for
291 * AMD64) the UNWIND_INFO structure address in the
292 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
293 *
294 * Others: Allocated from the regular heap to avoid unnecessary executable data
295 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
296 void *pvUnwindInfo;
297#elif defined(IN_RING0)
298 /** Allocation handle. */
299 RTR0MEMOBJ hMemObj;
300#endif
301} IEMEXECMEMCHUNK;
302/** Pointer to a memory chunk. */
303typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
304
305
306/**
307 * Executable memory allocator for the native recompiler.
308 */
309typedef struct IEMEXECMEMALLOCATOR
310{
311 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
312 uint32_t uMagic;
313
314 /** The chunk size. */
315 uint32_t cbChunk;
316 /** The maximum number of chunks. */
317 uint32_t cMaxChunks;
318 /** The current number of chunks. */
319 uint32_t cChunks;
320 /** Hint where to start looking for available memory. */
321 uint32_t idxChunkHint;
322 /** Statistics: Current number of allocations. */
323 uint32_t cAllocations;
324
325 /** The total amount of memory available. */
326 uint64_t cbTotal;
327 /** Total amount of free memory. */
328 uint64_t cbFree;
329 /** Total amount of memory allocated. */
330 uint64_t cbAllocated;
331
332#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
333 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
334 *
335 * Since the chunk size is a power of two and the minimum chunk size is a lot
336 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
337 * require a whole number of uint64_t elements in the allocation bitmap. So,
338 * for sake of simplicity, they are allocated as one continous chunk for
339 * simplicity/laziness. */
340 uint64_t *pbmAlloc;
341 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
342 uint32_t cUnitsPerChunk;
343 /** Number of bitmap elements per chunk (for quickly locating the bitmap
344 * portion corresponding to an chunk). */
345 uint32_t cBitmapElementsPerChunk;
346#else
347 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
348 * @{ */
349 /** The size of the heap internal block header. This is used to adjust the
350 * request memory size to make sure there is exacly enough room for a header at
351 * the end of the blocks we allocate before the next 64 byte alignment line. */
352 uint32_t cbHeapBlockHdr;
353 /** The size of initial heap allocation required make sure the first
354 * allocation is correctly aligned. */
355 uint32_t cbHeapAlignTweak;
356 /** The alignment tweak allocation address. */
357 void *pvAlignTweak;
358 /** @} */
359#endif
360
361#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
362 /** Pointer to the array of unwind info running parallel to aChunks (same
363 * allocation as this structure, located after the bitmaps).
364 * (For Windows, the structures must reside in 32-bit RVA distance to the
365 * actual chunk, so they are allocated off the chunk.) */
366 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
367#endif
368
369 /** The allocation chunks. */
370 RT_FLEXIBLE_ARRAY_EXTENSION
371 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
372} IEMEXECMEMALLOCATOR;
373/** Pointer to an executable memory allocator. */
374typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
375
376/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
377#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
378
379
380static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
381
382
383/**
384 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
385 * the heap statistics.
386 */
387static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
388 uint32_t cbReq, uint32_t idxChunk)
389{
390 pExecMemAllocator->cAllocations += 1;
391 pExecMemAllocator->cbAllocated += cbReq;
392#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
393 pExecMemAllocator->cbFree -= cbReq;
394#else
395 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
396#endif
397 pExecMemAllocator->idxChunkHint = idxChunk;
398
399#ifdef RT_OS_DARWIN
400 /*
401 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
402 * on darwin. So, we mark the pages returned as read+write after alloc and
403 * expect the caller to call iemExecMemAllocatorReadyForUse when done
404 * writing to the allocation.
405 *
406 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
407 * for details.
408 */
409 /** @todo detect if this is necessary... it wasn't required on 10.15 or
410 * whatever older version it was. */
411 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
412 AssertRC(rc);
413#endif
414
415 return pvRet;
416}
417
418
419#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
420static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
421 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
422{
423 /*
424 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
425 */
426 Assert(!(cToScan & 63));
427 Assert(!(idxFirst & 63));
428 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
429 pbmAlloc += idxFirst / 64;
430
431 /*
432 * Scan the bitmap for cReqUnits of consequtive clear bits
433 */
434 /** @todo This can probably be done more efficiently for non-x86 systems. */
435 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
436 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
437 {
438 uint32_t idxAddBit = 1;
439 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
440 idxAddBit++;
441 if (idxAddBit >= cReqUnits)
442 {
443 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
444
445 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
446 pChunk->cFreeUnits -= cReqUnits;
447 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
448
449 void * const pvRet = (uint8_t *)pChunk->pvChunk
450 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
451
452 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
453 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
454 }
455
456 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
457 }
458 return NULL;
459}
460#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
461
462
463static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
464{
465#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
466 /*
467 * Figure out how much to allocate.
468 */
469 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
470 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
471 {
472 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
473 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
474 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
475 {
476 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
477 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
478 if (pvRet)
479 return pvRet;
480 }
481 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
482 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
483 cReqUnits, idxChunk);
484 }
485#else
486 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
487 if (pvRet)
488 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
489#endif
490 return NULL;
491
492}
493
494
495/**
496 * Allocates @a cbReq bytes of executable memory.
497 *
498 * @returns Pointer to the memory, NULL if out of memory or other problem
499 * encountered.
500 * @param pVCpu The cross context virtual CPU structure of the calling
501 * thread.
502 * @param cbReq How many bytes are required.
503 */
504static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
505{
506 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
507 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
508 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
509
510
511 for (unsigned iIteration = 0;; iIteration++)
512 {
513 /*
514 * Adjust the request size so it'll fit the allocator alignment/whatnot.
515 *
516 * For the RTHeapSimple allocator this means to follow the logic described
517 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
518 * existing chunks if we think we've got sufficient free memory around.
519 *
520 * While for the alternative one we just align it up to a whole unit size.
521 */
522#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
523 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
524#else
525 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
526#endif
527 if (cbReq <= pExecMemAllocator->cbFree)
528 {
529 uint32_t const cChunks = pExecMemAllocator->cChunks;
530 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
531 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
532 {
533 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
534 if (pvRet)
535 return pvRet;
536 }
537 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
538 {
539 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
540 if (pvRet)
541 return pvRet;
542 }
543 }
544
545 /*
546 * Can we grow it with another chunk?
547 */
548 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
549 {
550 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
551 AssertLogRelRCReturn(rc, NULL);
552
553 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
554 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
555 if (pvRet)
556 return pvRet;
557 AssertFailed();
558 }
559
560 /*
561 * Try prune native TBs once.
562 */
563 if (iIteration == 0)
564 iemTbAllocatorFreeupNativeSpace(pVCpu, cbReq / sizeof(IEMNATIVEINSTR));
565 else
566 {
567 /** @todo stats... */
568 return NULL;
569 }
570 }
571
572}
573
574
575/** This is a hook that we may need later for changing memory protection back
576 * to readonly+exec */
577static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
578{
579#ifdef RT_OS_DARWIN
580 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
581 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
582 AssertRC(rc); RT_NOREF(pVCpu);
583
584 /*
585 * Flush the instruction cache:
586 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
587 */
588 /* sys_dcache_flush(pv, cb); - not necessary */
589 sys_icache_invalidate(pv, cb);
590#else
591 RT_NOREF(pVCpu, pv, cb);
592#endif
593}
594
595
596/**
597 * Frees executable memory.
598 */
599void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
600{
601 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
602 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
603 Assert(pv);
604#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
605 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
606#else
607 Assert(!((uintptr_t)pv & 63));
608#endif
609
610 /* Align the size as we did when allocating the block. */
611#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
612 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
613#else
614 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
615#endif
616
617 /* Free it / assert sanity. */
618#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
619 uint32_t const cChunks = pExecMemAllocator->cChunks;
620 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
621 bool fFound = false;
622 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
623 {
624 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
625 fFound = offChunk < cbChunk;
626 if (fFound)
627 {
628#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
629 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
630 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
631
632 /* Check that it's valid and free it. */
633 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
634 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
635 for (uint32_t i = 1; i < cReqUnits; i++)
636 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
637 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
638
639 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
640 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
641
642 /* Update the stats. */
643 pExecMemAllocator->cbAllocated -= cb;
644 pExecMemAllocator->cbFree += cb;
645 pExecMemAllocator->cAllocations -= 1;
646 return;
647#else
648 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
649 break;
650#endif
651 }
652 }
653# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
654 AssertFailed();
655# else
656 Assert(fFound);
657# endif
658#endif
659
660#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
661 /* Update stats while cb is freshly calculated.*/
662 pExecMemAllocator->cbAllocated -= cb;
663 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
664 pExecMemAllocator->cAllocations -= 1;
665
666 /* Free it. */
667 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
668#endif
669}
670
671
672
673#ifdef IN_RING3
674# ifdef RT_OS_WINDOWS
675
676/**
677 * Initializes the unwind info structures for windows hosts.
678 */
679static int
680iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
681 void *pvChunk, uint32_t idxChunk)
682{
683 RT_NOREF(pVCpu);
684
685 /*
686 * The AMD64 unwind opcodes.
687 *
688 * This is a program that starts with RSP after a RET instruction that
689 * ends up in recompiled code, and the operations we describe here will
690 * restore all non-volatile registers and bring RSP back to where our
691 * RET address is. This means it's reverse order from what happens in
692 * the prologue.
693 *
694 * Note! Using a frame register approach here both because we have one
695 * and but mainly because the UWOP_ALLOC_LARGE argument values
696 * would be a pain to write initializers for. On the positive
697 * side, we're impervious to changes in the the stack variable
698 * area can can deal with dynamic stack allocations if necessary.
699 */
700 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
701 {
702 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
703 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
704 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
705 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
706 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
707 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
708 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
709 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
710 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
711 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
712 };
713 union
714 {
715 IMAGE_UNWIND_INFO Info;
716 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
717 } s_UnwindInfo =
718 {
719 {
720 /* .Version = */ 1,
721 /* .Flags = */ 0,
722 /* .SizeOfProlog = */ 16, /* whatever */
723 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
724 /* .FrameRegister = */ X86_GREG_xBP,
725 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
726 }
727 };
728 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
729 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
730
731 /*
732 * Calc how much space we need and allocate it off the exec heap.
733 */
734 unsigned const cFunctionEntries = 1;
735 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
736 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
737# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
738 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
739 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
740 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
741# else
742 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
743 - pExecMemAllocator->cbHeapBlockHdr;
744 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
745 32 /*cbAlignment*/);
746# endif
747 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
748 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
749
750 /*
751 * Initialize the structures.
752 */
753 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
754
755 paFunctions[0].BeginAddress = 0;
756 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
757 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
758
759 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
760 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
761
762 /*
763 * Register it.
764 */
765 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
766 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
767
768 return VINF_SUCCESS;
769}
770
771
772# else /* !RT_OS_WINDOWS */
773
774/**
775 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
776 */
777DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
778{
779 if (iValue >= 64)
780 {
781 Assert(iValue < 0x2000);
782 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
783 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
784 }
785 else if (iValue >= 0)
786 *Ptr.pb++ = (uint8_t)iValue;
787 else if (iValue > -64)
788 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
789 else
790 {
791 Assert(iValue > -0x2000);
792 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
793 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
794 }
795 return Ptr;
796}
797
798
799/**
800 * Emits an ULEB128 encoded value (up to 64-bit wide).
801 */
802DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
803{
804 while (uValue >= 0x80)
805 {
806 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
807 uValue >>= 7;
808 }
809 *Ptr.pb++ = (uint8_t)uValue;
810 return Ptr;
811}
812
813
814/**
815 * Emits a CFA rule as register @a uReg + offset @a off.
816 */
817DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
818{
819 *Ptr.pb++ = DW_CFA_def_cfa;
820 Ptr = iemDwarfPutUleb128(Ptr, uReg);
821 Ptr = iemDwarfPutUleb128(Ptr, off);
822 return Ptr;
823}
824
825
826/**
827 * Emits a register (@a uReg) save location:
828 * CFA + @a off * data_alignment_factor
829 */
830DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
831{
832 if (uReg < 0x40)
833 *Ptr.pb++ = DW_CFA_offset | uReg;
834 else
835 {
836 *Ptr.pb++ = DW_CFA_offset_extended;
837 Ptr = iemDwarfPutUleb128(Ptr, uReg);
838 }
839 Ptr = iemDwarfPutUleb128(Ptr, off);
840 return Ptr;
841}
842
843
844# if 0 /* unused */
845/**
846 * Emits a register (@a uReg) save location, using signed offset:
847 * CFA + @a offSigned * data_alignment_factor
848 */
849DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
850{
851 *Ptr.pb++ = DW_CFA_offset_extended_sf;
852 Ptr = iemDwarfPutUleb128(Ptr, uReg);
853 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
854 return Ptr;
855}
856# endif
857
858
859/**
860 * Initializes the unwind info section for non-windows hosts.
861 */
862static int
863iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
864 void *pvChunk, uint32_t idxChunk)
865{
866 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
867 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
868
869 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
870
871 /*
872 * Generate the CIE first.
873 */
874# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
875 uint8_t const iDwarfVer = 3;
876# else
877 uint8_t const iDwarfVer = 4;
878# endif
879 RTPTRUNION const PtrCie = Ptr;
880 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
881 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
882 *Ptr.pb++ = iDwarfVer; /* DwARF version */
883 *Ptr.pb++ = 0; /* Augmentation. */
884 if (iDwarfVer >= 4)
885 {
886 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
887 *Ptr.pb++ = 0; /* Segment selector size. */
888 }
889# ifdef RT_ARCH_AMD64
890 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
891# else
892 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
893# endif
894 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
895# ifdef RT_ARCH_AMD64
896 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
897# elif defined(RT_ARCH_ARM64)
898 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
899# else
900# error "port me"
901# endif
902 /* Initial instructions: */
903# ifdef RT_ARCH_AMD64
904 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
905 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
906 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
907 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
908 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
909 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
910 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
911 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
912# elif defined(RT_ARCH_ARM64)
913# if 1
914 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
915# else
916 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
917# endif
918 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
919 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
920 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
921 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
922 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
923 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
924 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
925 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
926 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
927 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
928 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
929 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
930 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
931 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
932# else
933# error "port me"
934# endif
935 while ((Ptr.u - PtrCie.u) & 3)
936 *Ptr.pb++ = DW_CFA_nop;
937 /* Finalize the CIE size. */
938 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
939
940 /*
941 * Generate an FDE for the whole chunk area.
942 */
943# ifdef IEMNATIVE_USE_LIBUNWIND
944 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
945# endif
946 RTPTRUNION const PtrFde = Ptr;
947 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
948 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
949 Ptr.pu32++;
950 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
951 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
952# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
953 *Ptr.pb++ = DW_CFA_nop;
954# endif
955 while ((Ptr.u - PtrFde.u) & 3)
956 *Ptr.pb++ = DW_CFA_nop;
957 /* Finalize the FDE size. */
958 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
959
960 /* Terminator entry. */
961 *Ptr.pu32++ = 0;
962 *Ptr.pu32++ = 0; /* just to be sure... */
963 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
964
965 /*
966 * Register it.
967 */
968# ifdef IEMNATIVE_USE_LIBUNWIND
969 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
970# else
971 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
972 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
973# endif
974
975# ifdef IEMNATIVE_USE_GDB_JIT
976 /*
977 * Now for telling GDB about this (experimental).
978 *
979 * This seems to work best with ET_DYN.
980 */
981 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
982# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
983 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
984 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
985# else
986 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
987 - pExecMemAllocator->cbHeapBlockHdr;
988 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
989# endif
990 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
991 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
992
993 RT_ZERO(*pSymFile);
994
995 /*
996 * The ELF header:
997 */
998 pSymFile->EHdr.e_ident[0] = ELFMAG0;
999 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1000 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1001 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1002 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1003 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1004 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1005 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1006# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1007 pSymFile->EHdr.e_type = ET_DYN;
1008# else
1009 pSymFile->EHdr.e_type = ET_REL;
1010# endif
1011# ifdef RT_ARCH_AMD64
1012 pSymFile->EHdr.e_machine = EM_AMD64;
1013# elif defined(RT_ARCH_ARM64)
1014 pSymFile->EHdr.e_machine = EM_AARCH64;
1015# else
1016# error "port me"
1017# endif
1018 pSymFile->EHdr.e_version = 1; /*?*/
1019 pSymFile->EHdr.e_entry = 0;
1020# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1021 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1022# else
1023 pSymFile->EHdr.e_phoff = 0;
1024# endif
1025 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1026 pSymFile->EHdr.e_flags = 0;
1027 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1028# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1029 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1030 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1031# else
1032 pSymFile->EHdr.e_phentsize = 0;
1033 pSymFile->EHdr.e_phnum = 0;
1034# endif
1035 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1036 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1037 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1038
1039 uint32_t offStrTab = 0;
1040#define APPEND_STR(a_szStr) do { \
1041 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1042 offStrTab += sizeof(a_szStr); \
1043 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1044 } while (0)
1045#define APPEND_STR_FMT(a_szStr, ...) do { \
1046 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1047 offStrTab++; \
1048 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1049 } while (0)
1050
1051 /*
1052 * Section headers.
1053 */
1054 /* Section header #0: NULL */
1055 unsigned i = 0;
1056 APPEND_STR("");
1057 RT_ZERO(pSymFile->aShdrs[i]);
1058 i++;
1059
1060 /* Section header: .eh_frame */
1061 pSymFile->aShdrs[i].sh_name = offStrTab;
1062 APPEND_STR(".eh_frame");
1063 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1064 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1065# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1066 pSymFile->aShdrs[i].sh_offset
1067 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1068# else
1069 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1070 pSymFile->aShdrs[i].sh_offset = 0;
1071# endif
1072
1073 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1074 pSymFile->aShdrs[i].sh_link = 0;
1075 pSymFile->aShdrs[i].sh_info = 0;
1076 pSymFile->aShdrs[i].sh_addralign = 1;
1077 pSymFile->aShdrs[i].sh_entsize = 0;
1078 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1079 i++;
1080
1081 /* Section header: .shstrtab */
1082 unsigned const iShStrTab = i;
1083 pSymFile->EHdr.e_shstrndx = iShStrTab;
1084 pSymFile->aShdrs[i].sh_name = offStrTab;
1085 APPEND_STR(".shstrtab");
1086 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1087 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1088# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1089 pSymFile->aShdrs[i].sh_offset
1090 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1091# else
1092 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1093 pSymFile->aShdrs[i].sh_offset = 0;
1094# endif
1095 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1096 pSymFile->aShdrs[i].sh_link = 0;
1097 pSymFile->aShdrs[i].sh_info = 0;
1098 pSymFile->aShdrs[i].sh_addralign = 1;
1099 pSymFile->aShdrs[i].sh_entsize = 0;
1100 i++;
1101
1102 /* Section header: .symbols */
1103 pSymFile->aShdrs[i].sh_name = offStrTab;
1104 APPEND_STR(".symtab");
1105 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1106 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1107 pSymFile->aShdrs[i].sh_offset
1108 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1109 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1110 pSymFile->aShdrs[i].sh_link = iShStrTab;
1111 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1112 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1113 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1114 i++;
1115
1116# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1117 /* Section header: .symbols */
1118 pSymFile->aShdrs[i].sh_name = offStrTab;
1119 APPEND_STR(".dynsym");
1120 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1121 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1122 pSymFile->aShdrs[i].sh_offset
1123 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1124 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1125 pSymFile->aShdrs[i].sh_link = iShStrTab;
1126 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1127 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1128 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1129 i++;
1130# endif
1131
1132# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1133 /* Section header: .dynamic */
1134 pSymFile->aShdrs[i].sh_name = offStrTab;
1135 APPEND_STR(".dynamic");
1136 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1137 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1138 pSymFile->aShdrs[i].sh_offset
1139 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1140 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1141 pSymFile->aShdrs[i].sh_link = iShStrTab;
1142 pSymFile->aShdrs[i].sh_info = 0;
1143 pSymFile->aShdrs[i].sh_addralign = 1;
1144 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1145 i++;
1146# endif
1147
1148 /* Section header: .text */
1149 unsigned const iShText = i;
1150 pSymFile->aShdrs[i].sh_name = offStrTab;
1151 APPEND_STR(".text");
1152 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1153 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1154# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1155 pSymFile->aShdrs[i].sh_offset
1156 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1157# else
1158 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1159 pSymFile->aShdrs[i].sh_offset = 0;
1160# endif
1161 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1162 pSymFile->aShdrs[i].sh_link = 0;
1163 pSymFile->aShdrs[i].sh_info = 0;
1164 pSymFile->aShdrs[i].sh_addralign = 1;
1165 pSymFile->aShdrs[i].sh_entsize = 0;
1166 i++;
1167
1168 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1169
1170# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1171 /*
1172 * The program headers:
1173 */
1174 /* Everything in a single LOAD segment: */
1175 i = 0;
1176 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1177 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1178 pSymFile->aPhdrs[i].p_offset
1179 = pSymFile->aPhdrs[i].p_vaddr
1180 = pSymFile->aPhdrs[i].p_paddr = 0;
1181 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1182 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1183 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1184 i++;
1185 /* The .dynamic segment. */
1186 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1187 pSymFile->aPhdrs[i].p_flags = PF_R;
1188 pSymFile->aPhdrs[i].p_offset
1189 = pSymFile->aPhdrs[i].p_vaddr
1190 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1191 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1192 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1193 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1194 i++;
1195
1196 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1197
1198 /*
1199 * The dynamic section:
1200 */
1201 i = 0;
1202 pSymFile->aDyn[i].d_tag = DT_SONAME;
1203 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1204 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1205 i++;
1206 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1207 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1208 i++;
1209 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1210 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1211 i++;
1212 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1213 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1214 i++;
1215 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1216 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1217 i++;
1218 pSymFile->aDyn[i].d_tag = DT_NULL;
1219 i++;
1220 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1221# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1222
1223 /*
1224 * Symbol tables:
1225 */
1226 /** @todo gdb doesn't seem to really like this ... */
1227 i = 0;
1228 pSymFile->aSymbols[i].st_name = 0;
1229 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1230 pSymFile->aSymbols[i].st_value = 0;
1231 pSymFile->aSymbols[i].st_size = 0;
1232 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1233 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1234# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1235 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1236# endif
1237 i++;
1238
1239 pSymFile->aSymbols[i].st_name = 0;
1240 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1241 pSymFile->aSymbols[i].st_value = 0;
1242 pSymFile->aSymbols[i].st_size = 0;
1243 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1244 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1245 i++;
1246
1247 pSymFile->aSymbols[i].st_name = offStrTab;
1248 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1249# if 0
1250 pSymFile->aSymbols[i].st_shndx = iShText;
1251 pSymFile->aSymbols[i].st_value = 0;
1252# else
1253 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1254 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1255# endif
1256 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1257 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1258 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1259# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1260 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1261 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1262# endif
1263 i++;
1264
1265 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1266 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1267
1268 /*
1269 * The GDB JIT entry and informing GDB.
1270 */
1271 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1272# if 1
1273 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1274# else
1275 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1276# endif
1277
1278 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1279 RTCritSectEnter(&g_IemNativeGdbJitLock);
1280 pEhFrame->GdbJitEntry.pNext = NULL;
1281 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1282 if (__jit_debug_descriptor.pTail)
1283 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1284 else
1285 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1286 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1287 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1288
1289 /* Notify GDB: */
1290 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1291 __jit_debug_register_code();
1292 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1293 RTCritSectLeave(&g_IemNativeGdbJitLock);
1294
1295# else /* !IEMNATIVE_USE_GDB_JIT */
1296 RT_NOREF(pVCpu);
1297# endif /* !IEMNATIVE_USE_GDB_JIT */
1298
1299 return VINF_SUCCESS;
1300}
1301
1302# endif /* !RT_OS_WINDOWS */
1303#endif /* IN_RING3 */
1304
1305
1306/**
1307 * Adds another chunk to the executable memory allocator.
1308 *
1309 * This is used by the init code for the initial allocation and later by the
1310 * regular allocator function when it's out of memory.
1311 */
1312static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1313{
1314 /* Check that we've room for growth. */
1315 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1316 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1317
1318 /* Allocate a chunk. */
1319#ifdef RT_OS_DARWIN
1320 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1321#else
1322 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1323#endif
1324 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1325
1326#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1327 int rc = VINF_SUCCESS;
1328#else
1329 /* Initialize the heap for the chunk. */
1330 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1331 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1332 AssertRC(rc);
1333 if (RT_SUCCESS(rc))
1334 {
1335 /*
1336 * We want the memory to be aligned on 64 byte, so the first time thru
1337 * here we do some exploratory allocations to see how we can achieve this.
1338 * On subsequent runs we only make an initial adjustment allocation, if
1339 * necessary.
1340 *
1341 * Since we own the heap implementation, we know that the internal block
1342 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1343 * so all we need to wrt allocation size adjustments is to add 32 bytes
1344 * to the size, align up by 64 bytes, and subtract 32 bytes.
1345 *
1346 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1347 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1348 * allocation to force subsequent allocations to return 64 byte aligned
1349 * user areas.
1350 */
1351 if (!pExecMemAllocator->cbHeapBlockHdr)
1352 {
1353 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1354 pExecMemAllocator->cbHeapAlignTweak = 64;
1355 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1356 32 /*cbAlignment*/);
1357 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1358
1359 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1360 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1361 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1362 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1363 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1364
1365 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1366 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1367 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1368 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1369 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1370
1371 RTHeapSimpleFree(hHeap, pvTest2);
1372 RTHeapSimpleFree(hHeap, pvTest1);
1373 }
1374 else
1375 {
1376 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1377 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1378 }
1379 if (RT_SUCCESS(rc))
1380#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1381 {
1382 /*
1383 * Add the chunk.
1384 *
1385 * This must be done before the unwind init so windows can allocate
1386 * memory from the chunk when using the alternative sub-allocator.
1387 */
1388 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1389#ifdef IN_RING3
1390 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1391#endif
1392#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1393 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1394#else
1395 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1396 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1397 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1398 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1399#endif
1400
1401 pExecMemAllocator->cChunks = idxChunk + 1;
1402 pExecMemAllocator->idxChunkHint = idxChunk;
1403
1404#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1405 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1406 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1407#else
1408 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1409 pExecMemAllocator->cbTotal += cbFree;
1410 pExecMemAllocator->cbFree += cbFree;
1411#endif
1412
1413#ifdef IN_RING3
1414 /*
1415 * Initialize the unwind information (this cannot really fail atm).
1416 * (This sets pvUnwindInfo.)
1417 */
1418 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1419 if (RT_SUCCESS(rc))
1420#endif
1421 {
1422 return VINF_SUCCESS;
1423 }
1424
1425#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1426 /* Just in case the impossible happens, undo the above up: */
1427 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1428 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1429 pExecMemAllocator->cChunks = idxChunk;
1430 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1431 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1432 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1433 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1434#endif
1435 }
1436#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1437 }
1438#endif
1439 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1440 RT_NOREF(pVCpu);
1441 return rc;
1442}
1443
1444
1445/**
1446 * Initializes the executable memory allocator for native recompilation on the
1447 * calling EMT.
1448 *
1449 * @returns VBox status code.
1450 * @param pVCpu The cross context virtual CPU structure of the calling
1451 * thread.
1452 * @param cbMax The max size of the allocator.
1453 * @param cbInitial The initial allocator size.
1454 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1455 * dependent).
1456 */
1457int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1458{
1459 /*
1460 * Validate input.
1461 */
1462 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1463 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1464 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1465 || cbChunk == 0
1466 || ( RT_IS_POWER_OF_TWO(cbChunk)
1467 && cbChunk >= _1M
1468 && cbChunk <= _256M
1469 && cbChunk <= cbMax),
1470 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1471 VERR_OUT_OF_RANGE);
1472
1473 /*
1474 * Adjust/figure out the chunk size.
1475 */
1476 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1477 {
1478 if (cbMax >= _256M)
1479 cbChunk = _64M;
1480 else
1481 {
1482 if (cbMax < _16M)
1483 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1484 else
1485 cbChunk = (uint32_t)cbMax / 4;
1486 if (!RT_IS_POWER_OF_TWO(cbChunk))
1487 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1488 }
1489 }
1490
1491 if (cbChunk > cbMax)
1492 cbMax = cbChunk;
1493 else
1494 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1495 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1496 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1497
1498 /*
1499 * Allocate and initialize the allocatore instance.
1500 */
1501 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1502#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1503 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1504 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1505 cbNeeded += cbBitmap * cMaxChunks;
1506 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1507 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1508#endif
1509#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1510 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1511 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1512#endif
1513 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1514 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1515 VERR_NO_MEMORY);
1516 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1517 pExecMemAllocator->cbChunk = cbChunk;
1518 pExecMemAllocator->cMaxChunks = cMaxChunks;
1519 pExecMemAllocator->cChunks = 0;
1520 pExecMemAllocator->idxChunkHint = 0;
1521 pExecMemAllocator->cAllocations = 0;
1522 pExecMemAllocator->cbTotal = 0;
1523 pExecMemAllocator->cbFree = 0;
1524 pExecMemAllocator->cbAllocated = 0;
1525#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1526 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1527 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1528 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1529 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1530#endif
1531#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1532 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1533#endif
1534 for (uint32_t i = 0; i < cMaxChunks; i++)
1535 {
1536#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1537 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1538 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1539#else
1540 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1541#endif
1542 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1543#ifdef IN_RING0
1544 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1545#else
1546 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1547#endif
1548 }
1549 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1550
1551 /*
1552 * Do the initial allocations.
1553 */
1554 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1555 {
1556 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1557 AssertLogRelRCReturn(rc, rc);
1558 }
1559
1560 pExecMemAllocator->idxChunkHint = 0;
1561
1562 return VINF_SUCCESS;
1563}
1564
1565
1566/*********************************************************************************************************************************
1567* Native Recompilation *
1568*********************************************************************************************************************************/
1569
1570
1571/**
1572 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1573 */
1574IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1575{
1576 pVCpu->iem.s.cInstructions += idxInstr;
1577 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1578}
1579
1580
1581/**
1582 * Used by TB code when it wants to raise a \#DE.
1583 */
1584IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseDe,(PVMCPUCC pVCpu))
1585{
1586 iemRaiseDivideErrorJmp(pVCpu);
1587#ifndef _MSC_VER
1588 return VINF_IEM_RAISED_XCPT; /* not reached */
1589#endif
1590}
1591
1592
1593/**
1594 * Used by TB code when it wants to raise a \#UD.
1595 */
1596IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
1597{
1598 iemRaiseUndefinedOpcodeJmp(pVCpu);
1599#ifndef _MSC_VER
1600 return VINF_IEM_RAISED_XCPT; /* not reached */
1601#endif
1602}
1603
1604
1605/**
1606 * Used by TB code when it wants to raise an SSE related \#UD or \#NM.
1607 *
1608 * See IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT.
1609 */
1610IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseRelated,(PVMCPUCC pVCpu))
1611{
1612 if ( (pVCpu->cpum.GstCtx.cr0 & X86_CR0_EM)
1613 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSFXSR))
1614 iemRaiseUndefinedOpcodeJmp(pVCpu);
1615 else
1616 iemRaiseDeviceNotAvailableJmp(pVCpu);
1617#ifndef _MSC_VER
1618 return VINF_IEM_RAISED_XCPT; /* not reached */
1619#endif
1620}
1621
1622
1623/**
1624 * Used by TB code when it wants to raise an AVX related \#UD or \#NM.
1625 *
1626 * See IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT.
1627 */
1628IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseAvxRelated,(PVMCPUCC pVCpu))
1629{
1630 if ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE)
1631 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE))
1632 iemRaiseUndefinedOpcodeJmp(pVCpu);
1633 else
1634 iemRaiseDeviceNotAvailableJmp(pVCpu);
1635#ifndef _MSC_VER
1636 return VINF_IEM_RAISED_XCPT; /* not reached */
1637#endif
1638}
1639
1640
1641/**
1642 * Used by TB code when it wants to raise a \#NM.
1643 */
1644IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
1645{
1646 iemRaiseDeviceNotAvailableJmp(pVCpu);
1647#ifndef _MSC_VER
1648 return VINF_IEM_RAISED_XCPT; /* not reached */
1649#endif
1650}
1651
1652
1653/**
1654 * Used by TB code when it wants to raise a \#GP(0).
1655 */
1656IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
1657{
1658 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1659#ifndef _MSC_VER
1660 return VINF_IEM_RAISED_XCPT; /* not reached */
1661#endif
1662}
1663
1664
1665/**
1666 * Used by TB code when it wants to raise a \#MF.
1667 */
1668IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
1669{
1670 iemRaiseMathFaultJmp(pVCpu);
1671#ifndef _MSC_VER
1672 return VINF_IEM_RAISED_XCPT; /* not reached */
1673#endif
1674}
1675
1676
1677/**
1678 * Used by TB code when it wants to raise a \#XF.
1679 */
1680IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
1681{
1682 iemRaiseSimdFpExceptionJmp(pVCpu);
1683#ifndef _MSC_VER
1684 return VINF_IEM_RAISED_XCPT; /* not reached */
1685#endif
1686}
1687
1688
1689/**
1690 * Used by TB code when detecting opcode changes.
1691 * @see iemThreadeFuncWorkerObsoleteTb
1692 */
1693IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
1694{
1695 /* We set fSafeToFree to false where as we're being called in the context
1696 of a TB callback function, which for native TBs means we cannot release
1697 the executable memory till we've returned our way back to iemTbExec as
1698 that return path codes via the native code generated for the TB. */
1699 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1700 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
1701 return VINF_IEM_REEXEC_BREAK;
1702}
1703
1704
1705/**
1706 * Used by TB code when we need to switch to a TB with CS.LIM checking.
1707 */
1708IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
1709{
1710 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
1711 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1712 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
1713 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
1714 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
1715 return VINF_IEM_REEXEC_BREAK;
1716}
1717
1718
1719/**
1720 * Used by TB code when we missed a PC check after a branch.
1721 */
1722IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
1723{
1724 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
1725 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1726 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
1727 pVCpu->iem.s.pbInstrBuf));
1728 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
1729 return VINF_IEM_REEXEC_BREAK;
1730}
1731
1732
1733
1734/*********************************************************************************************************************************
1735* Helpers: Segmented memory fetches and stores. *
1736*********************************************************************************************************************************/
1737
1738/**
1739 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1740 */
1741IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1742{
1743#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1744 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1745#else
1746 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1747#endif
1748}
1749
1750
1751/**
1752 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1753 * to 16 bits.
1754 */
1755IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1756{
1757#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1758 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1759#else
1760 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1761#endif
1762}
1763
1764
1765/**
1766 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1767 * to 32 bits.
1768 */
1769IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1770{
1771#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1772 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1773#else
1774 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1775#endif
1776}
1777
1778/**
1779 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1780 * to 64 bits.
1781 */
1782IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1783{
1784#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1785 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1786#else
1787 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1788#endif
1789}
1790
1791
1792/**
1793 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1794 */
1795IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1796{
1797#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1798 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1799#else
1800 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1801#endif
1802}
1803
1804
1805/**
1806 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1807 * to 32 bits.
1808 */
1809IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1810{
1811#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1812 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1813#else
1814 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1815#endif
1816}
1817
1818
1819/**
1820 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1821 * to 64 bits.
1822 */
1823IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1824{
1825#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1826 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1827#else
1828 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1829#endif
1830}
1831
1832
1833/**
1834 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1835 */
1836IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1837{
1838#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1839 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1840#else
1841 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1842#endif
1843}
1844
1845
1846/**
1847 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
1848 * to 64 bits.
1849 */
1850IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1851{
1852#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1853 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1854#else
1855 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1856#endif
1857}
1858
1859
1860/**
1861 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1862 */
1863IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1864{
1865#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1866 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
1867#else
1868 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
1869#endif
1870}
1871
1872
1873/**
1874 * Used by TB code to store unsigned 8-bit data w/ segmentation.
1875 */
1876IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
1877{
1878#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1879 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1880#else
1881 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1882#endif
1883}
1884
1885
1886/**
1887 * Used by TB code to store unsigned 16-bit data w/ segmentation.
1888 */
1889IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
1890{
1891#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1892 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1893#else
1894 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1895#endif
1896}
1897
1898
1899/**
1900 * Used by TB code to store unsigned 32-bit data w/ segmentation.
1901 */
1902IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
1903{
1904#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1905 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1906#else
1907 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1908#endif
1909}
1910
1911
1912/**
1913 * Used by TB code to store unsigned 64-bit data w/ segmentation.
1914 */
1915IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
1916{
1917#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1918 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1919#else
1920 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1921#endif
1922}
1923
1924
1925
1926/**
1927 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
1928 */
1929IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1930{
1931#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1932 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1933#else
1934 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1935#endif
1936}
1937
1938
1939/**
1940 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
1941 */
1942IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1943{
1944#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1945 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1946#else
1947 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1948#endif
1949}
1950
1951
1952/**
1953 * Used by TB code to store an 32-bit selector value onto a generic stack.
1954 *
1955 * Intel CPUs doesn't do write a whole dword, thus the special function.
1956 */
1957IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1958{
1959#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1960 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1961#else
1962 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1963#endif
1964}
1965
1966
1967/**
1968 * Used by TB code to push unsigned 64-bit value onto a generic stack.
1969 */
1970IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1971{
1972#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1973 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1974#else
1975 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1976#endif
1977}
1978
1979
1980/**
1981 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1982 */
1983IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1984{
1985#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1986 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1987#else
1988 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
1989#endif
1990}
1991
1992
1993/**
1994 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1995 */
1996IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1997{
1998#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1999 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2000#else
2001 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
2002#endif
2003}
2004
2005
2006/**
2007 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2008 */
2009IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2010{
2011#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2012 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2013#else
2014 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
2015#endif
2016}
2017
2018
2019
2020/*********************************************************************************************************************************
2021* Helpers: Flat memory fetches and stores. *
2022*********************************************************************************************************************************/
2023
2024/**
2025 * Used by TB code to load unsigned 8-bit data w/ flat address.
2026 * @note Zero extending the value to 64-bit to simplify assembly.
2027 */
2028IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2029{
2030#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2031 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2032#else
2033 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2034#endif
2035}
2036
2037
2038/**
2039 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2040 * to 16 bits.
2041 * @note Zero extending the value to 64-bit to simplify assembly.
2042 */
2043IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2044{
2045#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2046 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2047#else
2048 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2049#endif
2050}
2051
2052
2053/**
2054 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2055 * to 32 bits.
2056 * @note Zero extending the value to 64-bit to simplify assembly.
2057 */
2058IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2059{
2060#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2061 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2062#else
2063 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2064#endif
2065}
2066
2067
2068/**
2069 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2070 * to 64 bits.
2071 */
2072IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2073{
2074#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2075 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2076#else
2077 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2078#endif
2079}
2080
2081
2082/**
2083 * Used by TB code to load unsigned 16-bit data w/ flat address.
2084 * @note Zero extending the value to 64-bit to simplify assembly.
2085 */
2086IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2087{
2088#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2089 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2090#else
2091 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2092#endif
2093}
2094
2095
2096/**
2097 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2098 * to 32 bits.
2099 * @note Zero extending the value to 64-bit to simplify assembly.
2100 */
2101IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2102{
2103#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2104 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2105#else
2106 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2107#endif
2108}
2109
2110
2111/**
2112 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2113 * to 64 bits.
2114 * @note Zero extending the value to 64-bit to simplify assembly.
2115 */
2116IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2117{
2118#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2119 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2120#else
2121 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2122#endif
2123}
2124
2125
2126/**
2127 * Used by TB code to load unsigned 32-bit data w/ flat address.
2128 * @note Zero extending the value to 64-bit to simplify assembly.
2129 */
2130IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2131{
2132#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2133 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2134#else
2135 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2136#endif
2137}
2138
2139
2140/**
2141 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
2142 * to 64 bits.
2143 * @note Zero extending the value to 64-bit to simplify assembly.
2144 */
2145IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2146{
2147#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2148 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2149#else
2150 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2151#endif
2152}
2153
2154
2155/**
2156 * Used by TB code to load unsigned 64-bit data w/ flat address.
2157 */
2158IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2159{
2160#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2161 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2162#else
2163 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
2164#endif
2165}
2166
2167
2168/**
2169 * Used by TB code to store unsigned 8-bit data w/ flat address.
2170 */
2171IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
2172{
2173#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2174 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
2175#else
2176 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
2177#endif
2178}
2179
2180
2181/**
2182 * Used by TB code to store unsigned 16-bit data w/ flat address.
2183 */
2184IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2185{
2186#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2187 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
2188#else
2189 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
2190#endif
2191}
2192
2193
2194/**
2195 * Used by TB code to store unsigned 32-bit data w/ flat address.
2196 */
2197IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2198{
2199#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2200 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
2201#else
2202 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
2203#endif
2204}
2205
2206
2207/**
2208 * Used by TB code to store unsigned 64-bit data w/ flat address.
2209 */
2210IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2211{
2212#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2213 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
2214#else
2215 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
2216#endif
2217}
2218
2219
2220
2221/**
2222 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
2223 */
2224IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2225{
2226#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2227 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2228#else
2229 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2230#endif
2231}
2232
2233
2234/**
2235 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
2236 */
2237IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2238{
2239#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2240 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2241#else
2242 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2243#endif
2244}
2245
2246
2247/**
2248 * Used by TB code to store a segment selector value onto a flat stack.
2249 *
2250 * Intel CPUs doesn't do write a whole dword, thus the special function.
2251 */
2252IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2253{
2254#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2255 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2256#else
2257 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2258#endif
2259}
2260
2261
2262/**
2263 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
2264 */
2265IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2266{
2267#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2268 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2269#else
2270 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2271#endif
2272}
2273
2274
2275/**
2276 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2277 */
2278IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2279{
2280#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2281 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2282#else
2283 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
2284#endif
2285}
2286
2287
2288/**
2289 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2290 */
2291IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2292{
2293#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2294 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2295#else
2296 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
2297#endif
2298}
2299
2300
2301/**
2302 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2303 */
2304IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2305{
2306#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2307 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2308#else
2309 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
2310#endif
2311}
2312
2313
2314
2315/*********************************************************************************************************************************
2316* Helpers: Segmented memory mapping. *
2317*********************************************************************************************************************************/
2318
2319/**
2320 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
2321 * segmentation.
2322 */
2323IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2324 RTGCPTR GCPtrMem, uint8_t iSegReg))
2325{
2326#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2327 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2328#else
2329 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2330#endif
2331}
2332
2333
2334/**
2335 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
2336 */
2337IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2338 RTGCPTR GCPtrMem, uint8_t iSegReg))
2339{
2340#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2341 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2342#else
2343 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2344#endif
2345}
2346
2347
2348/**
2349 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
2350 */
2351IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2352 RTGCPTR GCPtrMem, uint8_t iSegReg))
2353{
2354#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2355 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2356#else
2357 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2358#endif
2359}
2360
2361
2362/**
2363 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
2364 */
2365IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2366 RTGCPTR GCPtrMem, uint8_t iSegReg))
2367{
2368#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2369 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2370#else
2371 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2372#endif
2373}
2374
2375
2376/**
2377 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
2378 * segmentation.
2379 */
2380IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2381 RTGCPTR GCPtrMem, uint8_t iSegReg))
2382{
2383#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2384 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2385#else
2386 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2387#endif
2388}
2389
2390
2391/**
2392 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
2393 */
2394IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2395 RTGCPTR GCPtrMem, uint8_t iSegReg))
2396{
2397#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2398 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2399#else
2400 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2401#endif
2402}
2403
2404
2405/**
2406 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
2407 */
2408IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2409 RTGCPTR GCPtrMem, uint8_t iSegReg))
2410{
2411#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2412 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2413#else
2414 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2415#endif
2416}
2417
2418
2419/**
2420 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
2421 */
2422IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2423 RTGCPTR GCPtrMem, uint8_t iSegReg))
2424{
2425#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2426 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2427#else
2428 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2429#endif
2430}
2431
2432
2433/**
2434 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
2435 * segmentation.
2436 */
2437IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2438 RTGCPTR GCPtrMem, uint8_t iSegReg))
2439{
2440#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2441 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2442#else
2443 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2444#endif
2445}
2446
2447
2448/**
2449 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
2450 */
2451IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2452 RTGCPTR GCPtrMem, uint8_t iSegReg))
2453{
2454#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2455 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2456#else
2457 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2458#endif
2459}
2460
2461
2462/**
2463 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
2464 */
2465IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2466 RTGCPTR GCPtrMem, uint8_t iSegReg))
2467{
2468#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2469 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2470#else
2471 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2472#endif
2473}
2474
2475
2476/**
2477 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
2478 */
2479IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2480 RTGCPTR GCPtrMem, uint8_t iSegReg))
2481{
2482#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2483 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2484#else
2485 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2486#endif
2487}
2488
2489
2490/**
2491 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
2492 * segmentation.
2493 */
2494IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2495 RTGCPTR GCPtrMem, uint8_t iSegReg))
2496{
2497#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2498 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2499#else
2500 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2501#endif
2502}
2503
2504
2505/**
2506 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
2507 */
2508IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2509 RTGCPTR GCPtrMem, uint8_t iSegReg))
2510{
2511#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2512 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2513#else
2514 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2515#endif
2516}
2517
2518
2519/**
2520 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
2521 */
2522IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2523 RTGCPTR GCPtrMem, uint8_t iSegReg))
2524{
2525#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2526 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2527#else
2528 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2529#endif
2530}
2531
2532
2533/**
2534 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
2535 */
2536IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2537 RTGCPTR GCPtrMem, uint8_t iSegReg))
2538{
2539#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2540 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2541#else
2542 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2543#endif
2544}
2545
2546
2547/**
2548 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
2549 */
2550IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2551 RTGCPTR GCPtrMem, uint8_t iSegReg))
2552{
2553#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2554 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2555#else
2556 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2557#endif
2558}
2559
2560
2561/**
2562 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
2563 */
2564IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2565 RTGCPTR GCPtrMem, uint8_t iSegReg))
2566{
2567#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2568 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2569#else
2570 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2571#endif
2572}
2573
2574
2575/**
2576 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
2577 * segmentation.
2578 */
2579IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2580 RTGCPTR GCPtrMem, uint8_t iSegReg))
2581{
2582#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2583 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2584#else
2585 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2586#endif
2587}
2588
2589
2590/**
2591 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
2592 */
2593IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2594 RTGCPTR GCPtrMem, uint8_t iSegReg))
2595{
2596#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2597 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2598#else
2599 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2600#endif
2601}
2602
2603
2604/**
2605 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
2606 */
2607IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2608 RTGCPTR GCPtrMem, uint8_t iSegReg))
2609{
2610#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2611 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2612#else
2613 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2614#endif
2615}
2616
2617
2618/**
2619 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
2620 */
2621IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2622 RTGCPTR GCPtrMem, uint8_t iSegReg))
2623{
2624#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2625 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2626#else
2627 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2628#endif
2629}
2630
2631
2632/*********************************************************************************************************************************
2633* Helpers: Flat memory mapping. *
2634*********************************************************************************************************************************/
2635
2636/**
2637 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
2638 * address.
2639 */
2640IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2641{
2642#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2643 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2644#else
2645 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2646#endif
2647}
2648
2649
2650/**
2651 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
2652 */
2653IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2654{
2655#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2656 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2657#else
2658 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2659#endif
2660}
2661
2662
2663/**
2664 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
2665 */
2666IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2667{
2668#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2669 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2670#else
2671 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2672#endif
2673}
2674
2675
2676/**
2677 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
2678 */
2679IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2680{
2681#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2682 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2683#else
2684 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2685#endif
2686}
2687
2688
2689/**
2690 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
2691 * address.
2692 */
2693IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2694{
2695#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2696 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2697#else
2698 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2699#endif
2700}
2701
2702
2703/**
2704 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
2705 */
2706IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2707{
2708#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2709 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2710#else
2711 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2712#endif
2713}
2714
2715
2716/**
2717 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
2718 */
2719IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2720{
2721#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2722 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2723#else
2724 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2725#endif
2726}
2727
2728
2729/**
2730 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
2731 */
2732IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2733{
2734#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2735 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2736#else
2737 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2738#endif
2739}
2740
2741
2742/**
2743 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
2744 * address.
2745 */
2746IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2747{
2748#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2749 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2750#else
2751 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2752#endif
2753}
2754
2755
2756/**
2757 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
2758 */
2759IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2760{
2761#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2762 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2763#else
2764 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2765#endif
2766}
2767
2768
2769/**
2770 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
2771 */
2772IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2773{
2774#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2775 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2776#else
2777 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2778#endif
2779}
2780
2781
2782/**
2783 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
2784 */
2785IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2786{
2787#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2788 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2789#else
2790 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2791#endif
2792}
2793
2794
2795/**
2796 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
2797 * address.
2798 */
2799IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2800{
2801#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2802 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2803#else
2804 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2805#endif
2806}
2807
2808
2809/**
2810 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
2811 */
2812IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2813{
2814#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2815 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2816#else
2817 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2818#endif
2819}
2820
2821
2822/**
2823 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
2824 */
2825IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2826{
2827#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2828 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2829#else
2830 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2831#endif
2832}
2833
2834
2835/**
2836 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
2837 */
2838IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2839{
2840#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2841 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2842#else
2843 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2844#endif
2845}
2846
2847
2848/**
2849 * Used by TB code to map 80-bit float data writeonly w/ flat address.
2850 */
2851IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2852{
2853#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2854 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2855#else
2856 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2857#endif
2858}
2859
2860
2861/**
2862 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
2863 */
2864IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2865{
2866#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2867 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2868#else
2869 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2870#endif
2871}
2872
2873
2874/**
2875 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
2876 * address.
2877 */
2878IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2879{
2880#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2881 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2882#else
2883 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2884#endif
2885}
2886
2887
2888/**
2889 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
2890 */
2891IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2892{
2893#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2894 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2895#else
2896 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2897#endif
2898}
2899
2900
2901/**
2902 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
2903 */
2904IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2905{
2906#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2907 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2908#else
2909 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2910#endif
2911}
2912
2913
2914/**
2915 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
2916 */
2917IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2918{
2919#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2920 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2921#else
2922 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2923#endif
2924}
2925
2926
2927/*********************************************************************************************************************************
2928* Helpers: Commit, rollback & unmap *
2929*********************************************************************************************************************************/
2930
2931/**
2932 * Used by TB code to commit and unmap a read-write memory mapping.
2933 */
2934IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2935{
2936 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
2937}
2938
2939
2940/**
2941 * Used by TB code to commit and unmap a read-write memory mapping.
2942 */
2943IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2944{
2945 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2946}
2947
2948
2949/**
2950 * Used by TB code to commit and unmap a write-only memory mapping.
2951 */
2952IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2953{
2954 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2955}
2956
2957
2958/**
2959 * Used by TB code to commit and unmap a read-only memory mapping.
2960 */
2961IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2962{
2963 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2964}
2965
2966
2967/**
2968 * Reinitializes the native recompiler state.
2969 *
2970 * Called before starting a new recompile job.
2971 */
2972static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2973{
2974 pReNative->cLabels = 0;
2975 pReNative->bmLabelTypes = 0;
2976 pReNative->cFixups = 0;
2977#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2978 pReNative->pDbgInfo->cEntries = 0;
2979#endif
2980 pReNative->pTbOrg = pTb;
2981 pReNative->cCondDepth = 0;
2982 pReNative->uCondSeqNo = 0;
2983 pReNative->uCheckIrqSeqNo = 0;
2984 pReNative->uTlbSeqNo = 0;
2985
2986#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2987 pReNative->Core.offPc = 0;
2988 pReNative->Core.cInstrPcUpdateSkipped = 0;
2989#endif
2990#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2991 pReNative->fSimdRaiseXcptChecksEmitted = 0;
2992#endif
2993 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2994#if IEMNATIVE_HST_GREG_COUNT < 32
2995 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2996#endif
2997 ;
2998 pReNative->Core.bmHstRegsWithGstShadow = 0;
2999 pReNative->Core.bmGstRegShadows = 0;
3000 pReNative->Core.bmVars = 0;
3001 pReNative->Core.bmStack = 0;
3002 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
3003 pReNative->Core.u64ArgVars = UINT64_MAX;
3004
3005 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 16);
3006 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
3007 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
3008 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
3009 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
3010 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
3011 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
3012 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
3013 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
3014 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
3015 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
3016 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
3017 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
3018 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
3019 pReNative->aidxUniqueLabels[13] = UINT32_MAX;
3020 pReNative->aidxUniqueLabels[14] = UINT32_MAX;
3021 pReNative->aidxUniqueLabels[15] = UINT32_MAX;
3022
3023 /* Full host register reinit: */
3024 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
3025 {
3026 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
3027 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
3028 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
3029 }
3030
3031 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
3032 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
3033#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3034 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
3035#endif
3036#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3037 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
3038#endif
3039#ifdef IEMNATIVE_REG_FIXED_TMP1
3040 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
3041#endif
3042#ifdef IEMNATIVE_REG_FIXED_PC_DBG
3043 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
3044#endif
3045 );
3046 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3047 {
3048 fRegs &= ~RT_BIT_32(idxReg);
3049 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
3050 }
3051
3052 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
3053#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3054 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
3055#endif
3056#ifdef IEMNATIVE_REG_FIXED_TMP0
3057 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3058#endif
3059#ifdef IEMNATIVE_REG_FIXED_TMP1
3060 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
3061#endif
3062#ifdef IEMNATIVE_REG_FIXED_PC_DBG
3063 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
3064#endif
3065
3066#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3067# ifdef RT_ARCH_ARM64
3068 /*
3069 * Arm64 has 32 128-bit registers only, in order to support emulating 256-bit registers we pair
3070 * two real registers statically to one virtual for now, leaving us with only 16 256-bit registers.
3071 * We always pair v0 with v1, v2 with v3, etc. so we mark the higher register as fixed here during init
3072 * and the register allocator assumes that it will be always free when the lower is picked.
3073 */
3074 uint32_t const fFixedAdditional = UINT32_C(0xaaaaaaaa);
3075# else
3076 uint32_t const fFixedAdditional = 0;
3077# endif
3078
3079 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
3080 | fFixedAdditional
3081# if IEMNATIVE_HST_SIMD_REG_COUNT < 32
3082 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
3083# endif
3084 ;
3085 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
3086 pReNative->Core.bmGstSimdRegShadows = 0;
3087 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
3088 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
3089
3090 /* Full host register reinit: */
3091 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
3092 {
3093 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
3094 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
3095 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
3096 }
3097
3098 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK | fFixedAdditional;
3099 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3100 {
3101 fRegs &= ~RT_BIT_32(idxReg);
3102 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
3103 }
3104
3105#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
3106 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3107#endif
3108
3109#endif
3110
3111 return pReNative;
3112}
3113
3114
3115/**
3116 * Allocates and initializes the native recompiler state.
3117 *
3118 * This is called the first time an EMT wants to recompile something.
3119 *
3120 * @returns Pointer to the new recompiler state.
3121 * @param pVCpu The cross context virtual CPU structure of the calling
3122 * thread.
3123 * @param pTb The TB that's about to be recompiled.
3124 * @thread EMT(pVCpu)
3125 */
3126static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
3127{
3128 VMCPU_ASSERT_EMT(pVCpu);
3129
3130 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
3131 AssertReturn(pReNative, NULL);
3132
3133 /*
3134 * Try allocate all the buffers and stuff we need.
3135 */
3136 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
3137 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
3138 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
3139#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3140 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
3141#endif
3142 if (RT_LIKELY( pReNative->pInstrBuf
3143 && pReNative->paLabels
3144 && pReNative->paFixups)
3145#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3146 && pReNative->pDbgInfo
3147#endif
3148 )
3149 {
3150 /*
3151 * Set the buffer & array sizes on success.
3152 */
3153 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
3154 pReNative->cLabelsAlloc = _8K;
3155 pReNative->cFixupsAlloc = _16K;
3156#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3157 pReNative->cDbgInfoAlloc = _16K;
3158#endif
3159
3160 /* Other constant stuff: */
3161 pReNative->pVCpu = pVCpu;
3162
3163 /*
3164 * Done, just need to save it and reinit it.
3165 */
3166 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
3167 return iemNativeReInit(pReNative, pTb);
3168 }
3169
3170 /*
3171 * Failed. Cleanup and return.
3172 */
3173 AssertFailed();
3174 RTMemFree(pReNative->pInstrBuf);
3175 RTMemFree(pReNative->paLabels);
3176 RTMemFree(pReNative->paFixups);
3177#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3178 RTMemFree(pReNative->pDbgInfo);
3179#endif
3180 RTMemFree(pReNative);
3181 return NULL;
3182}
3183
3184
3185/**
3186 * Creates a label
3187 *
3188 * If the label does not yet have a defined position,
3189 * call iemNativeLabelDefine() later to set it.
3190 *
3191 * @returns Label ID. Throws VBox status code on failure, so no need to check
3192 * the return value.
3193 * @param pReNative The native recompile state.
3194 * @param enmType The label type.
3195 * @param offWhere The instruction offset of the label. UINT32_MAX if the
3196 * label is not yet defined (default).
3197 * @param uData Data associated with the lable. Only applicable to
3198 * certain type of labels. Default is zero.
3199 */
3200DECL_HIDDEN_THROW(uint32_t)
3201iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3202 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
3203{
3204 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
3205
3206 /*
3207 * Locate existing label definition.
3208 *
3209 * This is only allowed for forward declarations where offWhere=UINT32_MAX
3210 * and uData is zero.
3211 */
3212 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3213 uint32_t const cLabels = pReNative->cLabels;
3214 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
3215#ifndef VBOX_STRICT
3216 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
3217 && offWhere == UINT32_MAX
3218 && uData == 0
3219#endif
3220 )
3221 {
3222#ifndef VBOX_STRICT
3223 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
3224 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3225 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
3226 if (idxLabel < pReNative->cLabels)
3227 return idxLabel;
3228#else
3229 for (uint32_t i = 0; i < cLabels; i++)
3230 if ( paLabels[i].enmType == enmType
3231 && paLabels[i].uData == uData)
3232 {
3233 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3234 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3235 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
3236 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
3237 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3238 return i;
3239 }
3240 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
3241 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3242#endif
3243 }
3244
3245 /*
3246 * Make sure we've got room for another label.
3247 */
3248 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
3249 { /* likely */ }
3250 else
3251 {
3252 uint32_t cNew = pReNative->cLabelsAlloc;
3253 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3254 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3255 cNew *= 2;
3256 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
3257 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
3258 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
3259 pReNative->paLabels = paLabels;
3260 pReNative->cLabelsAlloc = cNew;
3261 }
3262
3263 /*
3264 * Define a new label.
3265 */
3266 paLabels[cLabels].off = offWhere;
3267 paLabels[cLabels].enmType = enmType;
3268 paLabels[cLabels].uData = uData;
3269 pReNative->cLabels = cLabels + 1;
3270
3271 Assert((unsigned)enmType < 64);
3272 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
3273
3274 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3275 {
3276 Assert(uData == 0);
3277 pReNative->aidxUniqueLabels[enmType] = cLabels;
3278 }
3279
3280 if (offWhere != UINT32_MAX)
3281 {
3282#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3283 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3284 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
3285#endif
3286 }
3287 return cLabels;
3288}
3289
3290
3291/**
3292 * Defines the location of an existing label.
3293 *
3294 * @param pReNative The native recompile state.
3295 * @param idxLabel The label to define.
3296 * @param offWhere The position.
3297 */
3298DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
3299{
3300 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
3301 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
3302 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
3303 pLabel->off = offWhere;
3304#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3305 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3306 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
3307#endif
3308}
3309
3310
3311/**
3312 * Looks up a lable.
3313 *
3314 * @returns Label ID if found, UINT32_MAX if not.
3315 */
3316static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3317 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
3318{
3319 Assert((unsigned)enmType < 64);
3320 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
3321 {
3322 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3323 return pReNative->aidxUniqueLabels[enmType];
3324
3325 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3326 uint32_t const cLabels = pReNative->cLabels;
3327 for (uint32_t i = 0; i < cLabels; i++)
3328 if ( paLabels[i].enmType == enmType
3329 && paLabels[i].uData == uData
3330 && ( paLabels[i].off == offWhere
3331 || offWhere == UINT32_MAX
3332 || paLabels[i].off == UINT32_MAX))
3333 return i;
3334 }
3335 return UINT32_MAX;
3336}
3337
3338
3339/**
3340 * Adds a fixup.
3341 *
3342 * @throws VBox status code (int) on failure.
3343 * @param pReNative The native recompile state.
3344 * @param offWhere The instruction offset of the fixup location.
3345 * @param idxLabel The target label ID for the fixup.
3346 * @param enmType The fixup type.
3347 * @param offAddend Fixup addend if applicable to the type. Default is 0.
3348 */
3349DECL_HIDDEN_THROW(void)
3350iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
3351 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
3352{
3353 Assert(idxLabel <= UINT16_MAX);
3354 Assert((unsigned)enmType <= UINT8_MAX);
3355#ifdef RT_ARCH_ARM64
3356 AssertStmt( enmType != kIemNativeFixupType_RelImm14At5
3357 || pReNative->paLabels[idxLabel].enmType >= kIemNativeLabelType_FirstWithMultipleInstances
3358 || pReNative->paLabels[idxLabel].off == UINT32_MAX,
3359 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_SHORT_JMP_TO_TAIL_LABEL));
3360#endif
3361
3362 /*
3363 * Make sure we've room.
3364 */
3365 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
3366 uint32_t const cFixups = pReNative->cFixups;
3367 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
3368 { /* likely */ }
3369 else
3370 {
3371 uint32_t cNew = pReNative->cFixupsAlloc;
3372 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3373 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3374 cNew *= 2;
3375 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
3376 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
3377 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
3378 pReNative->paFixups = paFixups;
3379 pReNative->cFixupsAlloc = cNew;
3380 }
3381
3382 /*
3383 * Add the fixup.
3384 */
3385 paFixups[cFixups].off = offWhere;
3386 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
3387 paFixups[cFixups].enmType = enmType;
3388 paFixups[cFixups].offAddend = offAddend;
3389 pReNative->cFixups = cFixups + 1;
3390}
3391
3392
3393/**
3394 * Slow code path for iemNativeInstrBufEnsure.
3395 */
3396DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
3397{
3398 /* Double the buffer size till we meet the request. */
3399 uint32_t cNew = pReNative->cInstrBufAlloc;
3400 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
3401 do
3402 cNew *= 2;
3403 while (cNew < off + cInstrReq);
3404
3405 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
3406#ifdef RT_ARCH_ARM64
3407 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
3408#else
3409 uint32_t const cbMaxInstrBuf = _2M;
3410#endif
3411 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
3412
3413 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
3414 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
3415
3416#ifdef VBOX_STRICT
3417 pReNative->offInstrBufChecked = off + cInstrReq;
3418#endif
3419 pReNative->cInstrBufAlloc = cNew;
3420 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
3421}
3422
3423#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3424
3425/**
3426 * Grows the static debug info array used during recompilation.
3427 *
3428 * @returns Pointer to the new debug info block; throws VBox status code on
3429 * failure, so no need to check the return value.
3430 */
3431DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3432{
3433 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
3434 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
3435 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
3436 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
3437 pReNative->pDbgInfo = pDbgInfo;
3438 pReNative->cDbgInfoAlloc = cNew;
3439 return pDbgInfo;
3440}
3441
3442
3443/**
3444 * Adds a new debug info uninitialized entry, returning the pointer to it.
3445 */
3446DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3447{
3448 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
3449 { /* likely */ }
3450 else
3451 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
3452 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
3453}
3454
3455
3456/**
3457 * Debug Info: Adds a native offset record, if necessary.
3458 */
3459DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3460{
3461 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
3462
3463 /*
3464 * Search backwards to see if we've got a similar record already.
3465 */
3466 uint32_t idx = pDbgInfo->cEntries;
3467 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
3468 while (idx-- > idxStop)
3469 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
3470 {
3471 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
3472 return;
3473 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
3474 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
3475 break;
3476 }
3477
3478 /*
3479 * Add it.
3480 */
3481 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
3482 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
3483 pEntry->NativeOffset.offNative = off;
3484}
3485
3486
3487/**
3488 * Debug Info: Record info about a label.
3489 */
3490static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
3491{
3492 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3493 pEntry->Label.uType = kIemTbDbgEntryType_Label;
3494 pEntry->Label.uUnused = 0;
3495 pEntry->Label.enmLabel = (uint8_t)enmType;
3496 pEntry->Label.uData = uData;
3497}
3498
3499
3500/**
3501 * Debug Info: Record info about a threaded call.
3502 */
3503static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
3504{
3505 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3506 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
3507 pEntry->ThreadedCall.fRecompiled = fRecompiled;
3508 pEntry->ThreadedCall.uUnused = 0;
3509 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
3510}
3511
3512
3513/**
3514 * Debug Info: Record info about a new guest instruction.
3515 */
3516static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
3517{
3518 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3519 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
3520 pEntry->GuestInstruction.uUnused = 0;
3521 pEntry->GuestInstruction.fExec = fExec;
3522}
3523
3524
3525/**
3526 * Debug Info: Record info about guest register shadowing.
3527 */
3528DECL_HIDDEN_THROW(void)
3529iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
3530 uint8_t idxHstReg /*= UINT8_MAX*/, uint8_t idxHstRegPrev /*= UINT8_MAX*/)
3531{
3532 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3533 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
3534 pEntry->GuestRegShadowing.uUnused = 0;
3535 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
3536 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
3537 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
3538}
3539
3540
3541# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3542/**
3543 * Debug Info: Record info about guest register shadowing.
3544 */
3545DECL_HIDDEN_THROW(void)
3546iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
3547 uint8_t idxHstSimdReg /*= UINT8_MAX*/, uint8_t idxHstSimdRegPrev /*= UINT8_MAX*/)
3548{
3549 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3550 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
3551 pEntry->GuestSimdRegShadowing.uUnused = 0;
3552 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
3553 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
3554 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
3555}
3556# endif
3557
3558
3559# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3560/**
3561 * Debug Info: Record info about delayed RIP updates.
3562 */
3563DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t offPc, uint32_t cInstrSkipped)
3564{
3565 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3566 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
3567 pEntry->DelayedPcUpdate.offPc = offPc;
3568 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
3569}
3570# endif
3571
3572#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
3573
3574
3575/*********************************************************************************************************************************
3576* Register Allocator *
3577*********************************************************************************************************************************/
3578
3579/**
3580 * Register parameter indexes (indexed by argument number).
3581 */
3582DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
3583{
3584 IEMNATIVE_CALL_ARG0_GREG,
3585 IEMNATIVE_CALL_ARG1_GREG,
3586 IEMNATIVE_CALL_ARG2_GREG,
3587 IEMNATIVE_CALL_ARG3_GREG,
3588#if defined(IEMNATIVE_CALL_ARG4_GREG)
3589 IEMNATIVE_CALL_ARG4_GREG,
3590# if defined(IEMNATIVE_CALL_ARG5_GREG)
3591 IEMNATIVE_CALL_ARG5_GREG,
3592# if defined(IEMNATIVE_CALL_ARG6_GREG)
3593 IEMNATIVE_CALL_ARG6_GREG,
3594# if defined(IEMNATIVE_CALL_ARG7_GREG)
3595 IEMNATIVE_CALL_ARG7_GREG,
3596# endif
3597# endif
3598# endif
3599#endif
3600};
3601AssertCompile(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3602
3603/**
3604 * Call register masks indexed by argument count.
3605 */
3606DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
3607{
3608 0,
3609 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
3610 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
3611 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
3612 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3613 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
3614#if defined(IEMNATIVE_CALL_ARG4_GREG)
3615 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3616 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
3617# if defined(IEMNATIVE_CALL_ARG5_GREG)
3618 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3619 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
3620# if defined(IEMNATIVE_CALL_ARG6_GREG)
3621 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3622 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3623 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
3624# if defined(IEMNATIVE_CALL_ARG7_GREG)
3625 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3626 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3627 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
3628# endif
3629# endif
3630# endif
3631#endif
3632};
3633
3634#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
3635/**
3636 * BP offset of the stack argument slots.
3637 *
3638 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
3639 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
3640 */
3641DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
3642{
3643 IEMNATIVE_FP_OFF_STACK_ARG0,
3644# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
3645 IEMNATIVE_FP_OFF_STACK_ARG1,
3646# endif
3647# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
3648 IEMNATIVE_FP_OFF_STACK_ARG2,
3649# endif
3650# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
3651 IEMNATIVE_FP_OFF_STACK_ARG3,
3652# endif
3653};
3654AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
3655#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
3656
3657/**
3658 * Info about shadowed guest register values.
3659 * @see IEMNATIVEGSTREG
3660 */
3661DECL_HIDDEN_CONST(IEMANTIVEGSTREGINFO const) g_aGstShadowInfo[] =
3662{
3663#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
3664 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
3665 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
3666 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
3667 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
3668 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
3669 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
3670 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
3671 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
3672 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
3673 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
3674 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
3675 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
3676 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
3677 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
3678 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
3679 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
3680 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
3681 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
3682 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
3683 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
3684 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
3685 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
3686 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
3687 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
3688 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
3689 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
3690 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
3691 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
3692 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
3693 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
3694 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
3695 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
3696 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
3697 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
3698 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
3699 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
3700 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
3701 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
3702 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
3703 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
3704 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
3705 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
3706 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
3707 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
3708 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
3709 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
3710 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
3711 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
3712#undef CPUMCTX_OFF_AND_SIZE
3713};
3714AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
3715
3716
3717/** Host CPU general purpose register names. */
3718DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
3719{
3720#ifdef RT_ARCH_AMD64
3721 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
3722#elif RT_ARCH_ARM64
3723 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
3724 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
3725#else
3726# error "port me"
3727#endif
3728};
3729
3730
3731#if 0 /* unused */
3732/**
3733 * Tries to locate a suitable register in the given register mask.
3734 *
3735 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3736 * failed.
3737 *
3738 * @returns Host register number on success, returns UINT8_MAX on failure.
3739 */
3740static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
3741{
3742 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3743 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3744 if (fRegs)
3745 {
3746 /** @todo pick better here: */
3747 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
3748
3749 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3750 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3751 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3752 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3753
3754 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3755 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3756 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3757 return idxReg;
3758 }
3759 return UINT8_MAX;
3760}
3761#endif /* unused */
3762
3763
3764/**
3765 * Locate a register, possibly freeing one up.
3766 *
3767 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3768 * failed.
3769 *
3770 * @returns Host register number on success. Returns UINT8_MAX if no registers
3771 * found, the caller is supposed to deal with this and raise a
3772 * allocation type specific status code (if desired).
3773 *
3774 * @throws VBox status code if we're run into trouble spilling a variable of
3775 * recording debug info. Does NOT throw anything if we're out of
3776 * registers, though.
3777 */
3778static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3779 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3780{
3781 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
3782 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3783 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3784
3785 /*
3786 * Try a freed register that's shadowing a guest register.
3787 */
3788 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3789 if (fRegs)
3790 {
3791 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
3792
3793#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3794 /*
3795 * When we have livness information, we use it to kick out all shadowed
3796 * guest register that will not be needed any more in this TB. If we're
3797 * lucky, this may prevent us from ending up here again.
3798 *
3799 * Note! We must consider the previous entry here so we don't free
3800 * anything that the current threaded function requires (current
3801 * entry is produced by the next threaded function).
3802 */
3803 uint32_t const idxCurCall = pReNative->idxCurCall;
3804 if (idxCurCall > 0)
3805 {
3806 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
3807
3808# ifndef IEMLIVENESS_EXTENDED_LAYOUT
3809 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
3810 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
3811 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
3812#else
3813 /* Construct a mask of the registers not in the read or write state.
3814 Note! We could skips writes, if they aren't from us, as this is just
3815 a hack to prevent trashing registers that have just been written
3816 or will be written when we retire the current instruction. */
3817 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3818 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3819 & IEMLIVENESSBIT_MASK;
3820#endif
3821 /* Merge EFLAGS. */
3822 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
3823 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
3824 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
3825 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
3826 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
3827
3828 /* If it matches any shadowed registers. */
3829 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
3830 {
3831 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
3832 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
3833 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
3834
3835 /* See if we've got any unshadowed registers we can return now. */
3836 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
3837 if (fUnshadowedRegs)
3838 {
3839 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
3840 return (fPreferVolatile
3841 ? ASMBitFirstSetU32(fUnshadowedRegs)
3842 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3843 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
3844 - 1;
3845 }
3846 }
3847 }
3848#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
3849
3850 unsigned const idxReg = (fPreferVolatile
3851 ? ASMBitFirstSetU32(fRegs)
3852 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3853 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
3854 - 1;
3855
3856 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3857 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3858 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3859 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3860
3861 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3862 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3863 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3864 return idxReg;
3865 }
3866
3867 /*
3868 * Try free up a variable that's in a register.
3869 *
3870 * We do two rounds here, first evacuating variables we don't need to be
3871 * saved on the stack, then in the second round move things to the stack.
3872 */
3873 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
3874 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3875 {
3876 uint32_t fVars = pReNative->Core.bmVars;
3877 while (fVars)
3878 {
3879 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3880 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3881 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3882 && (RT_BIT_32(idxReg) & fRegMask)
3883 && ( iLoop == 0
3884 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3885 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3886 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3887 {
3888 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3889 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3890 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3891 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3892 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3893 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3894
3895 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3896 {
3897 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
3898 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3899 }
3900
3901 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3902 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3903
3904 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3905 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3906 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3907 return idxReg;
3908 }
3909 fVars &= ~RT_BIT_32(idxVar);
3910 }
3911 }
3912
3913 return UINT8_MAX;
3914}
3915
3916
3917/**
3918 * Reassigns a variable to a different register specified by the caller.
3919 *
3920 * @returns The new code buffer position.
3921 * @param pReNative The native recompile state.
3922 * @param off The current code buffer position.
3923 * @param idxVar The variable index.
3924 * @param idxRegOld The old host register number.
3925 * @param idxRegNew The new host register number.
3926 * @param pszCaller The caller for logging.
3927 */
3928static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3929 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3930{
3931 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3932 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
3933 RT_NOREF(pszCaller);
3934
3935 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3936
3937 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3938 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
3939 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3940 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3941
3942 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3943 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3944 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3945 if (fGstRegShadows)
3946 {
3947 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3948 | RT_BIT_32(idxRegNew);
3949 while (fGstRegShadows)
3950 {
3951 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3952 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3953
3954 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3955 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3956 }
3957 }
3958
3959 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
3960 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3961 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3962 return off;
3963}
3964
3965
3966/**
3967 * Moves a variable to a different register or spills it onto the stack.
3968 *
3969 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3970 * kinds can easily be recreated if needed later.
3971 *
3972 * @returns The new code buffer position.
3973 * @param pReNative The native recompile state.
3974 * @param off The current code buffer position.
3975 * @param idxVar The variable index.
3976 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3977 * call-volatile registers.
3978 */
3979DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3980 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_GREG_MASK*/)
3981{
3982 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3983 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3984 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
3985 Assert(!pVar->fRegAcquired);
3986
3987 uint8_t const idxRegOld = pVar->idxReg;
3988 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3989 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3990 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3991 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3992 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3993 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3994 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3995 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3996
3997
3998 /** @todo Add statistics on this.*/
3999 /** @todo Implement basic variable liveness analysis (python) so variables
4000 * can be freed immediately once no longer used. This has the potential to
4001 * be trashing registers and stack for dead variables.
4002 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
4003
4004 /*
4005 * First try move it to a different register, as that's cheaper.
4006 */
4007 fForbiddenRegs |= RT_BIT_32(idxRegOld);
4008 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
4009 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
4010 if (fRegs)
4011 {
4012 /* Avoid using shadow registers, if possible. */
4013 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
4014 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
4015 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
4016 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
4017 }
4018
4019 /*
4020 * Otherwise we must spill the register onto the stack.
4021 */
4022 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
4023 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
4024 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
4025 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
4026
4027 pVar->idxReg = UINT8_MAX;
4028 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
4029 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
4030 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
4031 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
4032 return off;
4033}
4034
4035
4036/**
4037 * Allocates a temporary host general purpose register.
4038 *
4039 * This may emit code to save register content onto the stack in order to free
4040 * up a register.
4041 *
4042 * @returns The host register number; throws VBox status code on failure,
4043 * so no need to check the return value.
4044 * @param pReNative The native recompile state.
4045 * @param poff Pointer to the variable with the code buffer position.
4046 * This will be update if we need to move a variable from
4047 * register to stack in order to satisfy the request.
4048 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4049 * registers (@c true, default) or the other way around
4050 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4051 */
4052DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
4053{
4054 /*
4055 * Try find a completely unused register, preferably a call-volatile one.
4056 */
4057 uint8_t idxReg;
4058 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4059 & ~pReNative->Core.bmHstRegsWithGstShadow
4060 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
4061 if (fRegs)
4062 {
4063 if (fPreferVolatile)
4064 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4065 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4066 else
4067 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4068 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4069 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4070 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4071 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4072 }
4073 else
4074 {
4075 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
4076 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4077 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4078 }
4079 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4080}
4081
4082
4083/**
4084 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
4085 * registers.
4086 *
4087 * @returns The host register number; throws VBox status code on failure,
4088 * so no need to check the return value.
4089 * @param pReNative The native recompile state.
4090 * @param poff Pointer to the variable with the code buffer position.
4091 * This will be update if we need to move a variable from
4092 * register to stack in order to satisfy the request.
4093 * @param fRegMask Mask of acceptable registers.
4094 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4095 * registers (@c true, default) or the other way around
4096 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4097 */
4098DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
4099 bool fPreferVolatile /*= true*/)
4100{
4101 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
4102 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
4103
4104 /*
4105 * Try find a completely unused register, preferably a call-volatile one.
4106 */
4107 uint8_t idxReg;
4108 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4109 & ~pReNative->Core.bmHstRegsWithGstShadow
4110 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
4111 & fRegMask;
4112 if (fRegs)
4113 {
4114 if (fPreferVolatile)
4115 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4116 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4117 else
4118 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4119 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4120 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4121 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4122 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4123 }
4124 else
4125 {
4126 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
4127 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4128 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4129 }
4130 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4131}
4132
4133
4134/**
4135 * Allocates a temporary register for loading an immediate value into.
4136 *
4137 * This will emit code to load the immediate, unless there happens to be an
4138 * unused register with the value already loaded.
4139 *
4140 * The caller will not modify the returned register, it must be considered
4141 * read-only. Free using iemNativeRegFreeTmpImm.
4142 *
4143 * @returns The host register number; throws VBox status code on failure, so no
4144 * need to check the return value.
4145 * @param pReNative The native recompile state.
4146 * @param poff Pointer to the variable with the code buffer position.
4147 * @param uImm The immediate value that the register must hold upon
4148 * return.
4149 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4150 * registers (@c true, default) or the other way around
4151 * (@c false).
4152 *
4153 * @note Reusing immediate values has not been implemented yet.
4154 */
4155DECL_HIDDEN_THROW(uint8_t)
4156iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
4157{
4158 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
4159 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
4160 return idxReg;
4161}
4162
4163
4164/**
4165 * Allocates a temporary host general purpose register for keeping a guest
4166 * register value.
4167 *
4168 * Since we may already have a register holding the guest register value,
4169 * code will be emitted to do the loading if that's not the case. Code may also
4170 * be emitted if we have to free up a register to satify the request.
4171 *
4172 * @returns The host register number; throws VBox status code on failure, so no
4173 * need to check the return value.
4174 * @param pReNative The native recompile state.
4175 * @param poff Pointer to the variable with the code buffer
4176 * position. This will be update if we need to move a
4177 * variable from register to stack in order to satisfy
4178 * the request.
4179 * @param enmGstReg The guest register that will is to be updated.
4180 * @param enmIntendedUse How the caller will be using the host register.
4181 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
4182 * register is okay (default). The ASSUMPTION here is
4183 * that the caller has already flushed all volatile
4184 * registers, so this is only applied if we allocate a
4185 * new register.
4186 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
4187 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
4188 */
4189DECL_HIDDEN_THROW(uint8_t)
4190iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
4191 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
4192 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
4193{
4194 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4195#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4196 AssertMsg( fSkipLivenessAssert
4197 || pReNative->idxCurCall == 0
4198 || enmGstReg == kIemNativeGstReg_Pc
4199 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4200 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4201 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
4202 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4203 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
4204 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4205#endif
4206 RT_NOREF(fSkipLivenessAssert);
4207#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
4208 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
4209#endif
4210 uint32_t const fRegMask = !fNoVolatileRegs
4211 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
4212 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4213
4214 /*
4215 * First check if the guest register value is already in a host register.
4216 */
4217 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4218 {
4219 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4220 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4221 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4222 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4223
4224 /* It's not supposed to be allocated... */
4225 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4226 {
4227 /*
4228 * If the register will trash the guest shadow copy, try find a
4229 * completely unused register we can use instead. If that fails,
4230 * we need to disassociate the host reg from the guest reg.
4231 */
4232 /** @todo would be nice to know if preserving the register is in any way helpful. */
4233 /* If the purpose is calculations, try duplicate the register value as
4234 we'll be clobbering the shadow. */
4235 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
4236 && ( ~pReNative->Core.bmHstRegs
4237 & ~pReNative->Core.bmHstRegsWithGstShadow
4238 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
4239 {
4240 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
4241
4242 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4243
4244 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4245 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4246 g_apszIemNativeHstRegNames[idxRegNew]));
4247 idxReg = idxRegNew;
4248 }
4249 /* If the current register matches the restrictions, go ahead and allocate
4250 it for the caller. */
4251 else if (fRegMask & RT_BIT_32(idxReg))
4252 {
4253 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4254 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4255 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4256 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4257 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
4258 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4259 else
4260 {
4261 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
4262 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
4263 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4264 }
4265 }
4266 /* Otherwise, allocate a register that satisfies the caller and transfer
4267 the shadowing if compatible with the intended use. (This basically
4268 means the call wants a non-volatile register (RSP push/pop scenario).) */
4269 else
4270 {
4271 Assert(fNoVolatileRegs);
4272 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
4273 !fNoVolatileRegs
4274 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
4275 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4276 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4277 {
4278 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4279 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
4280 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
4281 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4282 }
4283 else
4284 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4285 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4286 g_apszIemNativeHstRegNames[idxRegNew]));
4287 idxReg = idxRegNew;
4288 }
4289 }
4290 else
4291 {
4292 /*
4293 * Oops. Shadowed guest register already allocated!
4294 *
4295 * Allocate a new register, copy the value and, if updating, the
4296 * guest shadow copy assignment to the new register.
4297 */
4298 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4299 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
4300 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
4301 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
4302
4303 /** @todo share register for readonly access. */
4304 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
4305 enmIntendedUse == kIemNativeGstRegUse_Calculation);
4306
4307 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4308 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4309
4310 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4311 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4312 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
4313 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4314 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4315 else
4316 {
4317 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4318 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
4319 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4320 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4321 }
4322 idxReg = idxRegNew;
4323 }
4324 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
4325
4326#ifdef VBOX_STRICT
4327 /* Strict builds: Check that the value is correct. */
4328 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4329#endif
4330
4331 return idxReg;
4332 }
4333
4334 /*
4335 * Allocate a new register, load it with the guest value and designate it as a copy of the
4336 */
4337 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
4338
4339 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4340 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
4341
4342 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4343 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
4344 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
4345 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4346
4347 return idxRegNew;
4348}
4349
4350
4351/**
4352 * Allocates a temporary host general purpose register that already holds the
4353 * given guest register value.
4354 *
4355 * The use case for this function is places where the shadowing state cannot be
4356 * modified due to branching and such. This will fail if the we don't have a
4357 * current shadow copy handy or if it's incompatible. The only code that will
4358 * be emitted here is value checking code in strict builds.
4359 *
4360 * The intended use can only be readonly!
4361 *
4362 * @returns The host register number, UINT8_MAX if not present.
4363 * @param pReNative The native recompile state.
4364 * @param poff Pointer to the instruction buffer offset.
4365 * Will be updated in strict builds if a register is
4366 * found.
4367 * @param enmGstReg The guest register that will is to be updated.
4368 * @note In strict builds, this may throw instruction buffer growth failures.
4369 * Non-strict builds will not throw anything.
4370 * @sa iemNativeRegAllocTmpForGuestReg
4371 */
4372DECL_HIDDEN_THROW(uint8_t)
4373iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4374{
4375 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4376#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4377 AssertMsg( pReNative->idxCurCall == 0
4378 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4379 || enmGstReg == kIemNativeGstReg_Pc,
4380 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4381#endif
4382
4383 /*
4384 * First check if the guest register value is already in a host register.
4385 */
4386 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4387 {
4388 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4389 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4390 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4391 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4392
4393 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4394 {
4395 /*
4396 * We only do readonly use here, so easy compared to the other
4397 * variant of this code.
4398 */
4399 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4400 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4401 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4402 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
4403 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4404
4405#ifdef VBOX_STRICT
4406 /* Strict builds: Check that the value is correct. */
4407 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4408#else
4409 RT_NOREF(poff);
4410#endif
4411 return idxReg;
4412 }
4413 }
4414
4415 return UINT8_MAX;
4416}
4417
4418
4419/**
4420 * Allocates argument registers for a function call.
4421 *
4422 * @returns New code buffer offset on success; throws VBox status code on failure, so no
4423 * need to check the return value.
4424 * @param pReNative The native recompile state.
4425 * @param off The current code buffer offset.
4426 * @param cArgs The number of arguments the function call takes.
4427 */
4428DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
4429{
4430 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
4431 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
4432 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4433 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4434
4435 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4436 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4437 else if (cArgs == 0)
4438 return true;
4439
4440 /*
4441 * Do we get luck and all register are free and not shadowing anything?
4442 */
4443 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
4444 for (uint32_t i = 0; i < cArgs; i++)
4445 {
4446 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4447 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4448 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4449 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4450 }
4451 /*
4452 * Okay, not lucky so we have to free up the registers.
4453 */
4454 else
4455 for (uint32_t i = 0; i < cArgs; i++)
4456 {
4457 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4458 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
4459 {
4460 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4461 {
4462 case kIemNativeWhat_Var:
4463 {
4464 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4465 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4466 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
4467 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4468 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
4469
4470 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
4471 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4472 else
4473 {
4474 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4475 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4476 }
4477 break;
4478 }
4479
4480 case kIemNativeWhat_Tmp:
4481 case kIemNativeWhat_Arg:
4482 case kIemNativeWhat_rc:
4483 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4484 default:
4485 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
4486 }
4487
4488 }
4489 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4490 {
4491 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4492 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4493 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4494 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4495 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4496 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4497 }
4498 else
4499 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4500 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4501 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4502 }
4503 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
4504 return true;
4505}
4506
4507
4508DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
4509
4510
4511#if 0
4512/**
4513 * Frees a register assignment of any type.
4514 *
4515 * @param pReNative The native recompile state.
4516 * @param idxHstReg The register to free.
4517 *
4518 * @note Does not update variables.
4519 */
4520DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4521{
4522 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4523 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4524 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
4525 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
4526 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
4527 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
4528 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
4529 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
4530 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
4531 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
4532 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4533 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4534 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4535 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4536
4537 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4538 /* no flushing, right:
4539 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4540 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4541 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4542 */
4543}
4544#endif
4545
4546
4547/**
4548 * Frees a temporary register.
4549 *
4550 * Any shadow copies of guest registers assigned to the host register will not
4551 * be flushed by this operation.
4552 */
4553DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4554{
4555 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4556 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
4557 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4558 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
4559 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4560}
4561
4562
4563/**
4564 * Frees a temporary immediate register.
4565 *
4566 * It is assumed that the call has not modified the register, so it still hold
4567 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
4568 */
4569DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4570{
4571 iemNativeRegFreeTmp(pReNative, idxHstReg);
4572}
4573
4574
4575/**
4576 * Frees a register assigned to a variable.
4577 *
4578 * The register will be disassociated from the variable.
4579 */
4580DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4581{
4582 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4583 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4584 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4585 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4586 Assert(pReNative->Core.aVars[idxVar].idxReg == idxHstReg);
4587
4588 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4589 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4590 if (!fFlushShadows)
4591 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4592 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4593 else
4594 {
4595 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4596 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4597 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4598 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4599 uint64_t fGstRegShadows = fGstRegShadowsOld;
4600 while (fGstRegShadows)
4601 {
4602 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4603 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4604
4605 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4606 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4607 }
4608 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4609 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4610 }
4611}
4612
4613
4614/**
4615 * Called right before emitting a call instruction to move anything important
4616 * out of call-volatile registers, free and flush the call-volatile registers,
4617 * optionally freeing argument variables.
4618 *
4619 * @returns New code buffer offset, UINT32_MAX on failure.
4620 * @param pReNative The native recompile state.
4621 * @param off The code buffer offset.
4622 * @param cArgs The number of arguments the function call takes.
4623 * It is presumed that the host register part of these have
4624 * been allocated as such already and won't need moving,
4625 * just freeing.
4626 * @param fKeepVars Mask of variables that should keep their register
4627 * assignments. Caller must take care to handle these.
4628 */
4629DECL_HIDDEN_THROW(uint32_t)
4630iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4631{
4632 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4633
4634 /* fKeepVars will reduce this mask. */
4635 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4636
4637 /*
4638 * Move anything important out of volatile registers.
4639 */
4640 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4641 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4642 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4643#ifdef IEMNATIVE_REG_FIXED_TMP0
4644 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4645#endif
4646#ifdef IEMNATIVE_REG_FIXED_TMP1
4647 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
4648#endif
4649#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4650 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4651#endif
4652 & ~g_afIemNativeCallRegs[cArgs];
4653
4654 fRegsToMove &= pReNative->Core.bmHstRegs;
4655 if (!fRegsToMove)
4656 { /* likely */ }
4657 else
4658 {
4659 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4660 while (fRegsToMove != 0)
4661 {
4662 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4663 fRegsToMove &= ~RT_BIT_32(idxReg);
4664
4665 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4666 {
4667 case kIemNativeWhat_Var:
4668 {
4669 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4670 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4671 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4672 Assert(pVar->idxReg == idxReg);
4673 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4674 {
4675 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4676 idxVar, pVar->enmKind, pVar->idxReg));
4677 if (pVar->enmKind != kIemNativeVarKind_Stack)
4678 pVar->idxReg = UINT8_MAX;
4679 else
4680 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4681 }
4682 else
4683 fRegsToFree &= ~RT_BIT_32(idxReg);
4684 continue;
4685 }
4686
4687 case kIemNativeWhat_Arg:
4688 AssertMsgFailed(("What?!?: %u\n", idxReg));
4689 continue;
4690
4691 case kIemNativeWhat_rc:
4692 case kIemNativeWhat_Tmp:
4693 AssertMsgFailed(("Missing free: %u\n", idxReg));
4694 continue;
4695
4696 case kIemNativeWhat_FixedTmp:
4697 case kIemNativeWhat_pVCpuFixed:
4698 case kIemNativeWhat_pCtxFixed:
4699 case kIemNativeWhat_PcShadow:
4700 case kIemNativeWhat_FixedReserved:
4701 case kIemNativeWhat_Invalid:
4702 case kIemNativeWhat_End:
4703 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4704 }
4705 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4706 }
4707 }
4708
4709 /*
4710 * Do the actual freeing.
4711 */
4712 if (pReNative->Core.bmHstRegs & fRegsToFree)
4713 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4714 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4715 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4716
4717 /* If there are guest register shadows in any call-volatile register, we
4718 have to clear the corrsponding guest register masks for each register. */
4719 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4720 if (fHstRegsWithGstShadow)
4721 {
4722 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4723 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
4724 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4725 do
4726 {
4727 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4728 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4729
4730 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4731 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4732 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4733 } while (fHstRegsWithGstShadow != 0);
4734 }
4735
4736 return off;
4737}
4738
4739
4740/**
4741 * Flushes a set of guest register shadow copies.
4742 *
4743 * This is usually done after calling a threaded function or a C-implementation
4744 * of an instruction.
4745 *
4746 * @param pReNative The native recompile state.
4747 * @param fGstRegs Set of guest registers to flush.
4748 */
4749DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4750{
4751 /*
4752 * Reduce the mask by what's currently shadowed
4753 */
4754 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4755 fGstRegs &= bmGstRegShadowsOld;
4756 if (fGstRegs)
4757 {
4758 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4759 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4760 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4761 if (bmGstRegShadowsNew)
4762 {
4763 /*
4764 * Partial.
4765 */
4766 do
4767 {
4768 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4769 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4770 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4771 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4772 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4773
4774 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4775 fGstRegs &= ~fInThisHstReg;
4776 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4777 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4778 if (!fGstRegShadowsNew)
4779 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4780 } while (fGstRegs != 0);
4781 }
4782 else
4783 {
4784 /*
4785 * Clear all.
4786 */
4787 do
4788 {
4789 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4790 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4791 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4792 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4793 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4794
4795 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4796 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4797 } while (fGstRegs != 0);
4798 pReNative->Core.bmHstRegsWithGstShadow = 0;
4799 }
4800 }
4801}
4802
4803
4804/**
4805 * Flushes guest register shadow copies held by a set of host registers.
4806 *
4807 * This is used with the TLB lookup code for ensuring that we don't carry on
4808 * with any guest shadows in volatile registers, as these will get corrupted by
4809 * a TLB miss.
4810 *
4811 * @param pReNative The native recompile state.
4812 * @param fHstRegs Set of host registers to flush guest shadows for.
4813 */
4814DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
4815{
4816 /*
4817 * Reduce the mask by what's currently shadowed.
4818 */
4819 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
4820 fHstRegs &= bmHstRegsWithGstShadowOld;
4821 if (fHstRegs)
4822 {
4823 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
4824 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
4825 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
4826 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
4827 if (bmHstRegsWithGstShadowNew)
4828 {
4829 /*
4830 * Partial (likely).
4831 */
4832 uint64_t fGstShadows = 0;
4833 do
4834 {
4835 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4836 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4837 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4838 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4839
4840 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4841 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4842 fHstRegs &= ~RT_BIT_32(idxHstReg);
4843 } while (fHstRegs != 0);
4844 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
4845 }
4846 else
4847 {
4848 /*
4849 * Clear all.
4850 */
4851 do
4852 {
4853 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4854 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4855 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4856 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4857
4858 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4859 fHstRegs &= ~RT_BIT_32(idxHstReg);
4860 } while (fHstRegs != 0);
4861 pReNative->Core.bmGstRegShadows = 0;
4862 }
4863 }
4864}
4865
4866
4867/**
4868 * Restores guest shadow copies in volatile registers.
4869 *
4870 * This is used after calling a helper function (think TLB miss) to restore the
4871 * register state of volatile registers.
4872 *
4873 * @param pReNative The native recompile state.
4874 * @param off The code buffer offset.
4875 * @param fHstRegsActiveShadows Set of host registers which are allowed to
4876 * be active (allocated) w/o asserting. Hack.
4877 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
4878 * iemNativeVarRestoreVolatileRegsPostHlpCall()
4879 */
4880DECL_HIDDEN_THROW(uint32_t)
4881iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
4882{
4883 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4884 if (fHstRegs)
4885 {
4886 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
4887 do
4888 {
4889 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4890
4891 /* It's not fatal if a register is active holding a variable that
4892 shadowing a guest register, ASSUMING all pending guest register
4893 writes were flushed prior to the helper call. However, we'll be
4894 emitting duplicate restores, so it wasts code space. */
4895 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
4896 RT_NOREF(fHstRegsActiveShadows);
4897
4898 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4899 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
4900 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
4901 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
4902
4903 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4904 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
4905
4906 fHstRegs &= ~RT_BIT_32(idxHstReg);
4907 } while (fHstRegs != 0);
4908 }
4909 return off;
4910}
4911
4912
4913
4914
4915/*********************************************************************************************************************************
4916* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
4917*********************************************************************************************************************************/
4918#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4919
4920/**
4921 * Info about shadowed guest SIMD register values.
4922 * @see IEMNATIVEGSTSIMDREG
4923 */
4924static struct
4925{
4926 /** Offset in VMCPU of XMM (low 128-bit) registers. */
4927 uint32_t offXmm;
4928 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
4929 uint32_t offYmm;
4930 /** Name (for logging). */
4931 const char *pszName;
4932} const g_aGstSimdShadowInfo[] =
4933{
4934#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
4935 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
4936 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
4937 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
4938 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
4939 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
4940 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
4941 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
4942 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
4943 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
4944 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
4945 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
4946 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
4947 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
4948 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
4949 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
4950 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
4951 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
4952#undef CPUMCTX_OFF_AND_SIZE
4953};
4954AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
4955
4956
4957#ifdef LOG_ENABLED
4958/** Host CPU SIMD register names. */
4959DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
4960{
4961#ifdef RT_ARCH_AMD64
4962 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
4963#elif RT_ARCH_ARM64
4964 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
4965 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
4966#else
4967# error "port me"
4968#endif
4969};
4970#endif
4971
4972
4973/**
4974 * Frees a temporary SIMD register.
4975 *
4976 * Any shadow copies of guest registers assigned to the host register will not
4977 * be flushed by this operation.
4978 */
4979DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
4980{
4981 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
4982 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
4983 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
4984 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
4985 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
4986}
4987
4988
4989/**
4990 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
4991 *
4992 * @returns New code bufferoffset.
4993 * @param pReNative The native recompile state.
4994 * @param off Current code buffer position.
4995 * @param enmGstSimdReg The guest SIMD register to flush.
4996 */
4997DECL_HIDDEN_THROW(uint32_t)
4998iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
4999{
5000 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5001
5002 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
5003 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
5004 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg),
5005 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)));
5006
5007 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
5008 {
5009 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5010 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
5011 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5012 }
5013
5014 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))
5015 {
5016 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5017 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
5018 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5019 }
5020
5021 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, enmGstSimdReg);
5022 return off;
5023}
5024
5025
5026/**
5027 * Locate a register, possibly freeing one up.
5028 *
5029 * This ASSUMES the caller has done the minimal/optimal allocation checks and
5030 * failed.
5031 *
5032 * @returns Host register number on success. Returns UINT8_MAX if no registers
5033 * found, the caller is supposed to deal with this and raise a
5034 * allocation type specific status code (if desired).
5035 *
5036 * @throws VBox status code if we're run into trouble spilling a variable of
5037 * recording debug info. Does NOT throw anything if we're out of
5038 * registers, though.
5039 */
5040static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
5041 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
5042{
5043 //STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
5044 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5045 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5046
5047 /*
5048 * Try a freed register that's shadowing a guest register.
5049 */
5050 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
5051 if (fRegs)
5052 {
5053 //STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
5054
5055#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5056 /*
5057 * When we have livness information, we use it to kick out all shadowed
5058 * guest register that will not be needed any more in this TB. If we're
5059 * lucky, this may prevent us from ending up here again.
5060 *
5061 * Note! We must consider the previous entry here so we don't free
5062 * anything that the current threaded function requires (current
5063 * entry is produced by the next threaded function).
5064 */
5065 uint32_t const idxCurCall = pReNative->idxCurCall;
5066 if (idxCurCall > 0)
5067 {
5068 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
5069
5070# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5071 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
5072 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
5073 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
5074#else
5075 /* Construct a mask of the registers not in the read or write state.
5076 Note! We could skips writes, if they aren't from us, as this is just
5077 a hack to prevent trashing registers that have just been written
5078 or will be written when we retire the current instruction. */
5079 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5080 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5081 & IEMLIVENESSBIT_MASK;
5082#endif
5083 /* If it matches any shadowed registers. */
5084 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
5085 {
5086 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
5087 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
5088 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
5089
5090 /* See if we've got any unshadowed registers we can return now. */
5091 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
5092 if (fUnshadowedRegs)
5093 {
5094 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
5095 return (fPreferVolatile
5096 ? ASMBitFirstSetU32(fUnshadowedRegs)
5097 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
5098 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
5099 - 1;
5100 }
5101 }
5102 }
5103#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5104
5105 unsigned const idxReg = (fPreferVolatile
5106 ? ASMBitFirstSetU32(fRegs)
5107 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5108 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
5109 - 1;
5110
5111 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
5112 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
5113 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5114 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
5115
5116 /* We need to flush any pending guest register writes this host SIMD register shadows. */
5117 uint32_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5118 uint32_t idxGstSimdReg = 0;
5119 do
5120 {
5121 if (fGstRegShadows & 0x1)
5122 {
5123 *poff = iemNativeSimdRegFlushPendingWrite(pReNative, *poff, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5124 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg));
5125 }
5126 idxGstSimdReg++;
5127 fGstRegShadows >>= 1;
5128 } while (fGstRegShadows);
5129
5130 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5131 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5132 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5133 pReNative->Core.aHstSimdRegs[idxReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5134 return idxReg;
5135 }
5136
5137 /*
5138 * Try free up a variable that's in a register.
5139 *
5140 * We do two rounds here, first evacuating variables we don't need to be
5141 * saved on the stack, then in the second round move things to the stack.
5142 */
5143 //STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
5144 AssertReleaseFailed(); /** @todo No variable support right now. */
5145#if 0
5146 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
5147 {
5148 uint32_t fVars = pReNative->Core.bmSimdVars;
5149 while (fVars)
5150 {
5151 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
5152 uint8_t const idxReg = pReNative->Core.aSimdVars[idxVar].idxReg;
5153 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
5154 && (RT_BIT_32(idxReg) & fRegMask)
5155 && ( iLoop == 0
5156 ? pReNative->Core.aSimdVars[idxVar].enmKind != kIemNativeVarKind_Stack
5157 : pReNative->Core.aSimdVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5158 && !pReNative->Core.aSimdVars[idxVar].fRegAcquired)
5159 {
5160 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
5161 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
5162 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5163 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5164 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
5165 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
5166
5167 if (pReNative->Core.aSimdVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5168 {
5169 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
5170 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
5171 }
5172
5173 pReNative->Core.aSimdVars[idxVar].idxReg = UINT8_MAX;
5174 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
5175
5176 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5177 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5178 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5179 return idxReg;
5180 }
5181 fVars &= ~RT_BIT_32(idxVar);
5182 }
5183 }
5184#endif
5185
5186 AssertFailed();
5187 return UINT8_MAX;
5188}
5189
5190
5191/**
5192 * Flushes a set of guest register shadow copies.
5193 *
5194 * This is usually done after calling a threaded function or a C-implementation
5195 * of an instruction.
5196 *
5197 * @param pReNative The native recompile state.
5198 * @param fGstSimdRegs Set of guest SIMD registers to flush.
5199 */
5200DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
5201{
5202 /*
5203 * Reduce the mask by what's currently shadowed
5204 */
5205 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
5206 fGstSimdRegs &= bmGstSimdRegShadows;
5207 if (fGstSimdRegs)
5208 {
5209 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
5210 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
5211 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
5212 if (bmGstSimdRegShadowsNew)
5213 {
5214 /*
5215 * Partial.
5216 */
5217 do
5218 {
5219 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5220 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5221 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5222 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5223 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5224 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5225
5226 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
5227 fGstSimdRegs &= ~fInThisHstReg;
5228 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5229 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5230 if (!fGstRegShadowsNew)
5231 {
5232 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5233 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5234 }
5235 } while (fGstSimdRegs != 0);
5236 }
5237 else
5238 {
5239 /*
5240 * Clear all.
5241 */
5242 do
5243 {
5244 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5245 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5246 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5247 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5248 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5249 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5250
5251 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5252 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
5253 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5254 } while (fGstSimdRegs != 0);
5255 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
5256 }
5257 }
5258}
5259
5260
5261/**
5262 * Allocates a temporary host SIMD register.
5263 *
5264 * This may emit code to save register content onto the stack in order to free
5265 * up a register.
5266 *
5267 * @returns The host register number; throws VBox status code on failure,
5268 * so no need to check the return value.
5269 * @param pReNative The native recompile state.
5270 * @param poff Pointer to the variable with the code buffer position.
5271 * This will be update if we need to move a variable from
5272 * register to stack in order to satisfy the request.
5273 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5274 * registers (@c true, default) or the other way around
5275 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5276 */
5277DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
5278{
5279 /*
5280 * Try find a completely unused register, preferably a call-volatile one.
5281 */
5282 uint8_t idxSimdReg;
5283 uint32_t fRegs = ~pReNative->Core.bmHstRegs
5284 & ~pReNative->Core.bmHstRegsWithGstShadow
5285 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
5286 if (fRegs)
5287 {
5288 if (fPreferVolatile)
5289 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5290 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5291 else
5292 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5293 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5294 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5295 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5296 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5297 }
5298 else
5299 {
5300 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
5301 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5302 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5303 }
5304
5305 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5306 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5307}
5308
5309
5310/**
5311 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
5312 * registers.
5313 *
5314 * @returns The host register number; throws VBox status code on failure,
5315 * so no need to check the return value.
5316 * @param pReNative The native recompile state.
5317 * @param poff Pointer to the variable with the code buffer position.
5318 * This will be update if we need to move a variable from
5319 * register to stack in order to satisfy the request.
5320 * @param fRegMask Mask of acceptable registers.
5321 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5322 * registers (@c true, default) or the other way around
5323 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5324 */
5325DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
5326 bool fPreferVolatile /*= true*/)
5327{
5328 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5329 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5330
5331 /*
5332 * Try find a completely unused register, preferably a call-volatile one.
5333 */
5334 uint8_t idxSimdReg;
5335 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
5336 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5337 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
5338 & fRegMask;
5339 if (fRegs)
5340 {
5341 if (fPreferVolatile)
5342 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5343 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5344 else
5345 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5346 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5347 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5348 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5349 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5350 }
5351 else
5352 {
5353 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
5354 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5355 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5356 }
5357
5358 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5359 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5360}
5361
5362
5363/**
5364 * Sets the indiactor for which part of the given SIMD register has valid data loaded.
5365 *
5366 * @param pReNative The native recompile state.
5367 * @param idxHstSimdReg The host SIMD register to update the state for.
5368 * @param enmLoadSz The load size to set.
5369 */
5370DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg,
5371 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5372{
5373 /* Everything valid already? -> nothing to do. */
5374 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5375 return;
5376
5377 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid)
5378 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
5379 else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz)
5380 {
5381 Assert( ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128
5382 && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5383 || ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128
5384 && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128));
5385 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256;
5386 }
5387}
5388
5389
5390static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdRegDst,
5391 uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
5392{
5393 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
5394 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst
5395 || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5396 {
5397# ifdef RT_ARCH_ARM64
5398 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
5399 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
5400# endif
5401
5402 if (idxHstSimdRegDst != idxHstSimdRegSrc)
5403 {
5404 switch (enmLoadSzDst)
5405 {
5406 case kIemNativeGstSimdRegLdStSz_256:
5407 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5408 break;
5409 case kIemNativeGstSimdRegLdStSz_Low128:
5410 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5411 break;
5412 case kIemNativeGstSimdRegLdStSz_High128:
5413 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst + 1, idxHstSimdRegSrc + 1);
5414 break;
5415 default:
5416 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5417 }
5418
5419 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst);
5420 }
5421 }
5422 else
5423 {
5424 /* Complicated stuff where the source is currently missing something, later. */
5425 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5426 }
5427
5428 return off;
5429}
5430
5431
5432/**
5433 * Allocates a temporary host SIMD register for keeping a guest
5434 * SIMD register value.
5435 *
5436 * Since we may already have a register holding the guest register value,
5437 * code will be emitted to do the loading if that's not the case. Code may also
5438 * be emitted if we have to free up a register to satify the request.
5439 *
5440 * @returns The host register number; throws VBox status code on failure, so no
5441 * need to check the return value.
5442 * @param pReNative The native recompile state.
5443 * @param poff Pointer to the variable with the code buffer
5444 * position. This will be update if we need to move a
5445 * variable from register to stack in order to satisfy
5446 * the request.
5447 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
5448 * @param enmIntendedUse How the caller will be using the host register.
5449 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
5450 * register is okay (default). The ASSUMPTION here is
5451 * that the caller has already flushed all volatile
5452 * registers, so this is only applied if we allocate a
5453 * new register.
5454 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
5455 */
5456DECL_HIDDEN_THROW(uint8_t)
5457iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
5458 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
5459 bool fNoVolatileRegs /*= false*/)
5460{
5461 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
5462#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
5463 AssertMsg( pReNative->idxCurCall == 0
5464 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5465 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5466 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
5467 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5468 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
5469 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
5470#endif
5471#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5472 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
5473#endif
5474 uint32_t const fRegMask = !fNoVolatileRegs
5475 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
5476 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
5477
5478 /*
5479 * First check if the guest register value is already in a host register.
5480 */
5481 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
5482 {
5483 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5484 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
5485 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
5486 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
5487
5488 /* It's not supposed to be allocated... */
5489 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
5490 {
5491 /*
5492 * If the register will trash the guest shadow copy, try find a
5493 * completely unused register we can use instead. If that fails,
5494 * we need to disassociate the host reg from the guest reg.
5495 */
5496 /** @todo would be nice to know if preserving the register is in any way helpful. */
5497 /* If the purpose is calculations, try duplicate the register value as
5498 we'll be clobbering the shadow. */
5499 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
5500 && ( ~pReNative->Core.bmHstSimdRegs
5501 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5502 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
5503 {
5504 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
5505
5506 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5507
5508 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5509 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5510 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5511 idxSimdReg = idxRegNew;
5512 }
5513 /* If the current register matches the restrictions, go ahead and allocate
5514 it for the caller. */
5515 else if (fRegMask & RT_BIT_32(idxSimdReg))
5516 {
5517 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5518 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
5519 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5520 {
5521 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5522 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxSimdReg, idxSimdReg, enmLoadSz);
5523 else
5524 iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz);
5525 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
5526 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5527 }
5528 else
5529 {
5530 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
5531 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
5532 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
5533 }
5534 }
5535 /* Otherwise, allocate a register that satisfies the caller and transfer
5536 the shadowing if compatible with the intended use. (This basically
5537 means the call wants a non-volatile register (RSP push/pop scenario).) */
5538 else
5539 {
5540 Assert(fNoVolatileRegs);
5541 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
5542 !fNoVolatileRegs
5543 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
5544 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5545 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5546 {
5547 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5548 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
5549 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
5550 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5551 }
5552 else
5553 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5554 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5555 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5556 idxSimdReg = idxRegNew;
5557 }
5558 }
5559 else
5560 {
5561 /*
5562 * Oops. Shadowed guest register already allocated!
5563 *
5564 * Allocate a new register, copy the value and, if updating, the
5565 * guest shadow copy assignment to the new register.
5566 */
5567 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5568 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
5569 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
5570 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
5571
5572 /** @todo share register for readonly access. */
5573 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
5574 enmIntendedUse == kIemNativeGstRegUse_Calculation);
5575
5576 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5577 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5578 else
5579 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5580
5581 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5582 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5583 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
5584 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5585 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5586 else
5587 {
5588 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5589 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
5590 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5591 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5592 }
5593 idxSimdReg = idxRegNew;
5594 }
5595 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
5596
5597#ifdef VBOX_STRICT
5598 /* Strict builds: Check that the value is correct. */
5599 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5600 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
5601#endif
5602
5603 return idxSimdReg;
5604 }
5605
5606 /*
5607 * Allocate a new register, load it with the guest value and designate it as a copy of the
5608 */
5609 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
5610
5611 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5612 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
5613 else
5614 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5615
5616 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5617 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
5618
5619 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
5620 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5621
5622 return idxRegNew;
5623}
5624
5625#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5626
5627
5628
5629/*********************************************************************************************************************************
5630* Code emitters for flushing pending guest register writes and sanity checks *
5631*********************************************************************************************************************************/
5632
5633#ifdef VBOX_STRICT
5634/**
5635 * Does internal register allocator sanity checks.
5636 */
5637DECLHIDDEN(void) iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
5638{
5639 /*
5640 * Iterate host registers building a guest shadowing set.
5641 */
5642 uint64_t bmGstRegShadows = 0;
5643 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
5644 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
5645 while (bmHstRegsWithGstShadow)
5646 {
5647 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
5648 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5649 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5650
5651 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5652 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
5653 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
5654 bmGstRegShadows |= fThisGstRegShadows;
5655 while (fThisGstRegShadows)
5656 {
5657 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
5658 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
5659 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
5660 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
5661 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
5662 }
5663 }
5664 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
5665 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
5666 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
5667
5668 /*
5669 * Now the other way around, checking the guest to host index array.
5670 */
5671 bmHstRegsWithGstShadow = 0;
5672 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
5673 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5674 while (bmGstRegShadows)
5675 {
5676 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
5677 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5678 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
5679
5680 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5681 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
5682 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
5683 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
5684 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5685 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
5686 }
5687 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
5688 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
5689 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
5690}
5691#endif /* VBOX_STRICT */
5692
5693
5694/**
5695 * Flushes any delayed guest register writes.
5696 *
5697 * This must be called prior to calling CImpl functions and any helpers that use
5698 * the guest state (like raising exceptions) and such.
5699 *
5700 * This optimization has not yet been implemented. The first target would be
5701 * RIP updates, since these are the most common ones.
5702 */
5703DECL_HIDDEN_THROW(uint32_t)
5704iemNativeRegFlushPendingWritesSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept, bool fFlushShadows)
5705{
5706#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5707 if (!(fGstShwExcept & kIemNativeGstReg_Pc))
5708 off = iemNativeEmitPcWriteback(pReNative, off);
5709#else
5710 RT_NOREF(pReNative, fGstShwExcept);
5711#endif
5712
5713#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5714 /** @todo r=bird: There must be a quicker way to check if anything needs
5715 * doing and then call simd function to do the flushing */
5716 /** @todo This doesn't mix well with fGstShwExcept but we ignore this for now and just flush everything. */
5717 for (uint8_t idxGstSimdReg = 0; idxGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo); idxGstSimdReg++)
5718 {
5719 Assert( (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg)
5720 || !IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg)));
5721
5722 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg))
5723 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5724
5725 if ( fFlushShadows
5726 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg))
5727 {
5728 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxGstSimdReg];
5729
5730 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
5731 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg)));
5732 }
5733 }
5734#else
5735 RT_NOREF(pReNative, fGstShwExcept, fFlushShadows);
5736#endif
5737
5738 return off;
5739}
5740
5741
5742#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5743/**
5744 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
5745 */
5746DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcWritebackSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5747{
5748 Assert(pReNative->Core.offPc);
5749# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5750 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5751 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, pReNative->Core.cInstrPcUpdateSkipped);
5752# endif
5753
5754# ifndef IEMNATIVE_REG_FIXED_PC_DBG
5755 /* Allocate a temporary PC register. */
5756 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5757
5758 /* Perform the addition and store the result. */
5759 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5760 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5761
5762 /* Free but don't flush the PC register. */
5763 iemNativeRegFreeTmp(pReNative, idxPcReg);
5764# else
5765 /* Compare the shadow with the context value, they should match. */
5766 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
5767 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
5768# endif
5769
5770 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, pReNative->Core.cInstrPcUpdateSkipped);
5771 pReNative->Core.offPc = 0;
5772 pReNative->Core.cInstrPcUpdateSkipped = 0;
5773
5774 return off;
5775}
5776#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
5777
5778
5779/*********************************************************************************************************************************
5780* Code Emitters (larger snippets) *
5781*********************************************************************************************************************************/
5782
5783/**
5784 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
5785 * extending to 64-bit width.
5786 *
5787 * @returns New code buffer offset on success, UINT32_MAX on failure.
5788 * @param pReNative .
5789 * @param off The current code buffer position.
5790 * @param idxHstReg The host register to load the guest register value into.
5791 * @param enmGstReg The guest register to load.
5792 *
5793 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5794 * that is something the caller needs to do if applicable.
5795 */
5796DECL_HIDDEN_THROW(uint32_t)
5797iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
5798{
5799 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
5800 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
5801
5802 switch (g_aGstShadowInfo[enmGstReg].cb)
5803 {
5804 case sizeof(uint64_t):
5805 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5806 case sizeof(uint32_t):
5807 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5808 case sizeof(uint16_t):
5809 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5810#if 0 /* not present in the table. */
5811 case sizeof(uint8_t):
5812 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5813#endif
5814 default:
5815 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5816 }
5817}
5818
5819
5820#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5821/**
5822 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
5823 *
5824 * @returns New code buffer offset on success, UINT32_MAX on failure.
5825 * @param pReNative The recompiler state.
5826 * @param off The current code buffer position.
5827 * @param idxHstSimdReg The host register to load the guest register value into.
5828 * @param enmGstSimdReg The guest register to load.
5829 * @param enmLoadSz The load size of the register.
5830 *
5831 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5832 * that is something the caller needs to do if applicable.
5833 */
5834DECL_HIDDEN_THROW(uint32_t)
5835iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
5836 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5837{
5838 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
5839
5840 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz);
5841 switch (enmLoadSz)
5842 {
5843 case kIemNativeGstSimdRegLdStSz_256:
5844 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5845 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5846 case kIemNativeGstSimdRegLdStSz_Low128:
5847 return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5848 case kIemNativeGstSimdRegLdStSz_High128:
5849 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5850 default:
5851 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5852 }
5853}
5854#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5855
5856#ifdef VBOX_STRICT
5857
5858/**
5859 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
5860 *
5861 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5862 * Trashes EFLAGS on AMD64.
5863 */
5864DECL_HIDDEN_THROW(uint32_t)
5865iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
5866{
5867# ifdef RT_ARCH_AMD64
5868 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
5869
5870 /* rol reg64, 32 */
5871 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5872 pbCodeBuf[off++] = 0xc1;
5873 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5874 pbCodeBuf[off++] = 32;
5875
5876 /* test reg32, ffffffffh */
5877 if (idxReg >= 8)
5878 pbCodeBuf[off++] = X86_OP_REX_B;
5879 pbCodeBuf[off++] = 0xf7;
5880 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5881 pbCodeBuf[off++] = 0xff;
5882 pbCodeBuf[off++] = 0xff;
5883 pbCodeBuf[off++] = 0xff;
5884 pbCodeBuf[off++] = 0xff;
5885
5886 /* je/jz +1 */
5887 pbCodeBuf[off++] = 0x74;
5888 pbCodeBuf[off++] = 0x01;
5889
5890 /* int3 */
5891 pbCodeBuf[off++] = 0xcc;
5892
5893 /* rol reg64, 32 */
5894 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5895 pbCodeBuf[off++] = 0xc1;
5896 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5897 pbCodeBuf[off++] = 32;
5898
5899# elif defined(RT_ARCH_ARM64)
5900 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5901 /* lsr tmp0, reg64, #32 */
5902 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
5903 /* cbz tmp0, +1 */
5904 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5905 /* brk #0x1100 */
5906 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
5907
5908# else
5909# error "Port me!"
5910# endif
5911 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5912 return off;
5913}
5914
5915
5916/**
5917 * Emitting code that checks that the content of register @a idxReg is the same
5918 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
5919 * instruction if that's not the case.
5920 *
5921 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5922 * Trashes EFLAGS on AMD64.
5923 */
5924DECL_HIDDEN_THROW(uint32_t)
5925iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
5926{
5927# ifdef RT_ARCH_AMD64
5928 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5929
5930 /* cmp reg, [mem] */
5931 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
5932 {
5933 if (idxReg >= 8)
5934 pbCodeBuf[off++] = X86_OP_REX_R;
5935 pbCodeBuf[off++] = 0x38;
5936 }
5937 else
5938 {
5939 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
5940 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
5941 else
5942 {
5943 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
5944 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5945 else
5946 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
5947 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
5948 if (idxReg >= 8)
5949 pbCodeBuf[off++] = X86_OP_REX_R;
5950 }
5951 pbCodeBuf[off++] = 0x39;
5952 }
5953 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
5954
5955 /* je/jz +1 */
5956 pbCodeBuf[off++] = 0x74;
5957 pbCodeBuf[off++] = 0x01;
5958
5959 /* int3 */
5960 pbCodeBuf[off++] = 0xcc;
5961
5962 /* For values smaller than the register size, we must check that the rest
5963 of the register is all zeros. */
5964 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
5965 {
5966 /* test reg64, imm32 */
5967 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5968 pbCodeBuf[off++] = 0xf7;
5969 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5970 pbCodeBuf[off++] = 0;
5971 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
5972 pbCodeBuf[off++] = 0xff;
5973 pbCodeBuf[off++] = 0xff;
5974
5975 /* je/jz +1 */
5976 pbCodeBuf[off++] = 0x74;
5977 pbCodeBuf[off++] = 0x01;
5978
5979 /* int3 */
5980 pbCodeBuf[off++] = 0xcc;
5981 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5982 }
5983 else
5984 {
5985 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5986 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
5987 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
5988 }
5989
5990# elif defined(RT_ARCH_ARM64)
5991 /* mov TMP0, [gstreg] */
5992 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
5993
5994 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5995 /* sub tmp0, tmp0, idxReg */
5996 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
5997 /* cbz tmp0, +1 */
5998 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5999 /* brk #0x1000+enmGstReg */
6000 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
6001 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6002
6003# else
6004# error "Port me!"
6005# endif
6006 return off;
6007}
6008
6009
6010# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6011/**
6012 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
6013 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
6014 * instruction if that's not the case.
6015 *
6016 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
6017 * Trashes EFLAGS on AMD64.
6018 */
6019DECL_HIDDEN_THROW(uint32_t)
6020iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
6021 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6022{
6023 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
6024 if ( ( enmLoadSz == kIemNativeGstSimdRegLdStSz_256
6025 && ( IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)
6026 || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6027 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128
6028 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
6029 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_High128
6030 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6031 return off;
6032
6033# ifdef RT_ARCH_AMD64
6034 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128); /** @todo 256-bit variant. */
6035
6036 /* movdqa vectmp0, idxSimdReg */
6037 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6038
6039 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
6040
6041 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
6042 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6043 if (IEMNATIVE_SIMD_REG_FIXED_TMP0 >= 8)
6044 pbCodeBuf[off++] = X86_OP_REX_R;
6045 pbCodeBuf[off++] = 0x0f;
6046 pbCodeBuf[off++] = 0x38;
6047 pbCodeBuf[off++] = 0x29;
6048 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6049
6050 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
6051 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6052 pbCodeBuf[off++] = X86_OP_REX_W
6053 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
6054 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6055 pbCodeBuf[off++] = 0x0f;
6056 pbCodeBuf[off++] = 0x3a;
6057 pbCodeBuf[off++] = 0x16;
6058 pbCodeBuf[off++] = 0xeb;
6059 pbCodeBuf[off++] = 0x00;
6060
6061 /* cmp tmp0, 0xffffffffffffffff. */
6062 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6063 pbCodeBuf[off++] = 0x83;
6064 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6065 pbCodeBuf[off++] = 0xff;
6066
6067 /* je/jz +1 */
6068 pbCodeBuf[off++] = 0x74;
6069 pbCodeBuf[off++] = 0x01;
6070
6071 /* int3 */
6072 pbCodeBuf[off++] = 0xcc;
6073
6074 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
6075 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6076 pbCodeBuf[off++] = X86_OP_REX_W
6077 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
6078 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6079 pbCodeBuf[off++] = 0x0f;
6080 pbCodeBuf[off++] = 0x3a;
6081 pbCodeBuf[off++] = 0x16;
6082 pbCodeBuf[off++] = 0xeb;
6083 pbCodeBuf[off++] = 0x01;
6084
6085 /* cmp tmp0, 0xffffffffffffffff. */
6086 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6087 pbCodeBuf[off++] = 0x83;
6088 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6089 pbCodeBuf[off++] = 0xff;
6090
6091 /* je/jz +1 */
6092 pbCodeBuf[off++] = 0x74;
6093 pbCodeBuf[off++] = 0x01;
6094
6095 /* int3 */
6096 pbCodeBuf[off++] = 0xcc;
6097
6098# elif defined(RT_ARCH_ARM64)
6099 /* mov vectmp0, [gstreg] */
6100 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
6101
6102 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6103 {
6104 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6105 /* eor vectmp0, vectmp0, idxSimdReg */
6106 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6107 /* cnt vectmp0, vectmp0, #0*/
6108 pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0);
6109 /* umov tmp0, vectmp0.D[0] */
6110 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6111 0 /*idxElem*/, kArmv8InstrUmovInsSz_U64);
6112 /* cbz tmp0, +1 */
6113 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6114 /* brk #0x1000+enmGstReg */
6115 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6116 }
6117
6118 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6119 {
6120 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6121 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
6122 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg);
6123 /* cnt vectmp0 + 1, vectmp0 + 1, #0*/
6124 pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1);
6125 /* umov tmp0, (vectmp0 + 1).D[0] */
6126 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
6127 0 /*idxElem*/, kArmv8InstrUmovInsSz_U64);
6128 /* cbz tmp0, +1 */
6129 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6130 /* brk #0x1000+enmGstReg */
6131 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6132 }
6133
6134# else
6135# error "Port me!"
6136# endif
6137
6138 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6139 return off;
6140}
6141# endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6142
6143
6144/**
6145 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
6146 * important bits.
6147 *
6148 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6149 * Trashes EFLAGS on AMD64.
6150 */
6151DECL_HIDDEN_THROW(uint32_t)
6152iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
6153{
6154 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6155 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
6156 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
6157 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
6158
6159#ifdef RT_ARCH_AMD64
6160 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6161
6162 /* je/jz +1 */
6163 pbCodeBuf[off++] = 0x74;
6164 pbCodeBuf[off++] = 0x01;
6165
6166 /* int3 */
6167 pbCodeBuf[off++] = 0xcc;
6168
6169# elif defined(RT_ARCH_ARM64)
6170 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6171
6172 /* b.eq +1 */
6173 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
6174 /* brk #0x2000 */
6175 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
6176
6177# else
6178# error "Port me!"
6179# endif
6180 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6181
6182 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6183 return off;
6184}
6185
6186#endif /* VBOX_STRICT */
6187
6188
6189#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6190/**
6191 * Worker for IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK.
6192 */
6193DECL_HIDDEN_THROW(uint32_t)
6194iemNativeEmitEFlagsSkippingCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflNeeded)
6195{
6196 uint32_t const offVCpu = RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags);
6197
6198 fEflNeeded &= X86_EFL_STATUS_BITS;
6199 if (fEflNeeded)
6200 {
6201# ifdef RT_ARCH_AMD64
6202 /* test dword [pVCpu + offVCpu], imm32 */
6203 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6204 if (fEflNeeded <= 0xff)
6205 {
6206 pCodeBuf[off++] = 0xf6;
6207 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6208 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6209 }
6210 else
6211 {
6212 pCodeBuf[off++] = 0xf7;
6213 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6214 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6215 pCodeBuf[off++] = RT_BYTE2(fEflNeeded);
6216 pCodeBuf[off++] = RT_BYTE3(fEflNeeded);
6217 pCodeBuf[off++] = RT_BYTE4(fEflNeeded);
6218 }
6219 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6220
6221# else
6222 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6223 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, offVCpu);
6224 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp, fEflNeeded);
6225# ifdef RT_ARCH_ARM64
6226 off = iemNativeEmitJzToFixed(pReNative, off, off + 2);
6227 off = iemNativeEmitBrk(pReNative, off, 0x7777);
6228# else
6229# error "Port me!"
6230# endif
6231 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6232# endif
6233 }
6234 return off;
6235}
6236#endif /* IEMNATIVE_STRICT_EFLAGS_SKIPPING */
6237
6238
6239/**
6240 * Emits a code for checking the return code of a call and rcPassUp, returning
6241 * from the code if either are non-zero.
6242 */
6243DECL_HIDDEN_THROW(uint32_t)
6244iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6245{
6246#ifdef RT_ARCH_AMD64
6247 /*
6248 * AMD64: eax = call status code.
6249 */
6250
6251 /* edx = rcPassUp */
6252 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6253# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6254 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
6255# endif
6256
6257 /* edx = eax | rcPassUp */
6258 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6259 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
6260 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
6261 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6262
6263 /* Jump to non-zero status return path. */
6264 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
6265
6266 /* done. */
6267
6268#elif RT_ARCH_ARM64
6269 /*
6270 * ARM64: w0 = call status code.
6271 */
6272# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6273 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
6274# endif
6275 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6276
6277 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6278
6279 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
6280
6281 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6282 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6283 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
6284
6285#else
6286# error "port me"
6287#endif
6288 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6289 RT_NOREF_PV(idxInstr);
6290 return off;
6291}
6292
6293
6294/**
6295 * Emits code to check if the content of @a idxAddrReg is a canonical address,
6296 * raising a \#GP(0) if it isn't.
6297 *
6298 * @returns New code buffer offset, UINT32_MAX on failure.
6299 * @param pReNative The native recompile state.
6300 * @param off The code buffer offset.
6301 * @param idxAddrReg The host register with the address to check.
6302 * @param idxInstr The current instruction.
6303 */
6304DECL_HIDDEN_THROW(uint32_t)
6305iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
6306{
6307 /*
6308 * Make sure we don't have any outstanding guest register writes as we may
6309 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6310 */
6311 off = iemNativeRegFlushPendingWrites(pReNative, off);
6312
6313#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6314 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6315#else
6316 RT_NOREF(idxInstr);
6317#endif
6318
6319#ifdef RT_ARCH_AMD64
6320 /*
6321 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
6322 * return raisexcpt();
6323 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
6324 */
6325 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6326
6327 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
6328 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
6329 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
6330 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
6331 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6332
6333 iemNativeRegFreeTmp(pReNative, iTmpReg);
6334
6335#elif defined(RT_ARCH_ARM64)
6336 /*
6337 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
6338 * return raisexcpt();
6339 * ----
6340 * mov x1, 0x800000000000
6341 * add x1, x0, x1
6342 * cmp xzr, x1, lsr 48
6343 * b.ne .Lraisexcpt
6344 */
6345 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6346
6347 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
6348 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
6349 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
6350 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6351
6352 iemNativeRegFreeTmp(pReNative, iTmpReg);
6353
6354#else
6355# error "Port me"
6356#endif
6357 return off;
6358}
6359
6360
6361/**
6362 * Emits code to check if that the content of @a idxAddrReg is within the limit
6363 * of CS, raising a \#GP(0) if it isn't.
6364 *
6365 * @returns New code buffer offset; throws VBox status code on error.
6366 * @param pReNative The native recompile state.
6367 * @param off The code buffer offset.
6368 * @param idxAddrReg The host register (32-bit) with the address to
6369 * check.
6370 * @param idxInstr The current instruction.
6371 */
6372DECL_HIDDEN_THROW(uint32_t)
6373iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6374 uint8_t idxAddrReg, uint8_t idxInstr)
6375{
6376 /*
6377 * Make sure we don't have any outstanding guest register writes as we may
6378 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6379 */
6380 off = iemNativeRegFlushPendingWrites(pReNative, off);
6381
6382#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6383 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6384#else
6385 RT_NOREF(idxInstr);
6386#endif
6387
6388 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
6389 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
6390 kIemNativeGstRegUse_ReadOnly);
6391
6392 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
6393 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6394
6395 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
6396 return off;
6397}
6398
6399
6400/**
6401 * Emits a call to a CImpl function or something similar.
6402 */
6403DECL_HIDDEN_THROW(uint32_t)
6404iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
6405 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
6406{
6407 /* Writeback everything. */
6408 off = iemNativeRegFlushPendingWrites(pReNative, off);
6409
6410 /*
6411 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
6412 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
6413 */
6414 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
6415 fGstShwFlush
6416 | RT_BIT_64(kIemNativeGstReg_Pc)
6417 | RT_BIT_64(kIemNativeGstReg_EFlags));
6418 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6419
6420 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6421
6422 /*
6423 * Load the parameters.
6424 */
6425#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
6426 /* Special code the hidden VBOXSTRICTRC pointer. */
6427 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6428 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6429 if (cAddParams > 0)
6430 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
6431 if (cAddParams > 1)
6432 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
6433 if (cAddParams > 2)
6434 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
6435 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6436
6437#else
6438 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6439 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6440 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6441 if (cAddParams > 0)
6442 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
6443 if (cAddParams > 1)
6444 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
6445 if (cAddParams > 2)
6446# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
6447 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
6448# else
6449 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
6450# endif
6451#endif
6452
6453 /*
6454 * Make the call.
6455 */
6456 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
6457
6458#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6459 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6460#endif
6461
6462 /*
6463 * Check the status code.
6464 */
6465 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
6466}
6467
6468
6469/**
6470 * Emits a call to a threaded worker function.
6471 */
6472DECL_HIDDEN_THROW(uint32_t)
6473iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6474{
6475 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6476
6477 /* We don't know what the threaded function is doing so we must flush all pending writes. */
6478 off = iemNativeRegFlushPendingWrites(pReNative, off);
6479
6480 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
6481 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6482
6483#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6484 /* The threaded function may throw / long jmp, so set current instruction
6485 number if we're counting. */
6486 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6487#endif
6488
6489 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
6490
6491#ifdef RT_ARCH_AMD64
6492 /* Load the parameters and emit the call. */
6493# ifdef RT_OS_WINDOWS
6494# ifndef VBOXSTRICTRC_STRICT_ENABLED
6495 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6496 if (cParams > 0)
6497 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
6498 if (cParams > 1)
6499 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
6500 if (cParams > 2)
6501 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
6502# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
6503 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
6504 if (cParams > 0)
6505 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
6506 if (cParams > 1)
6507 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
6508 if (cParams > 2)
6509 {
6510 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
6511 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
6512 }
6513 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6514# endif /* VBOXSTRICTRC_STRICT_ENABLED */
6515# else
6516 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6517 if (cParams > 0)
6518 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
6519 if (cParams > 1)
6520 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
6521 if (cParams > 2)
6522 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
6523# endif
6524
6525 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6526
6527# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6528 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6529# endif
6530
6531#elif RT_ARCH_ARM64
6532 /*
6533 * ARM64:
6534 */
6535 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6536 if (cParams > 0)
6537 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
6538 if (cParams > 1)
6539 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
6540 if (cParams > 2)
6541 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
6542
6543 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6544
6545#else
6546# error "port me"
6547#endif
6548
6549 /*
6550 * Check the status code.
6551 */
6552 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
6553
6554 return off;
6555}
6556
6557#ifdef VBOX_WITH_STATISTICS
6558/**
6559 * Emits code to update the thread call statistics.
6560 */
6561DECL_INLINE_THROW(uint32_t)
6562iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6563{
6564 /*
6565 * Update threaded function stats.
6566 */
6567 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
6568 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
6569# if defined(RT_ARCH_ARM64)
6570 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6571 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6572 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
6573 iemNativeRegFreeTmp(pReNative, idxTmp1);
6574 iemNativeRegFreeTmp(pReNative, idxTmp2);
6575# else
6576 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
6577# endif
6578 return off;
6579}
6580#endif /* VBOX_WITH_STATISTICS */
6581
6582
6583/**
6584 * Emits the code at the ReturnWithFlags label (returns
6585 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
6586 */
6587static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6588{
6589 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
6590 if (idxLabel != UINT32_MAX)
6591 {
6592 iemNativeLabelDefine(pReNative, idxLabel, off);
6593
6594 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
6595
6596 /* jump back to the return sequence. */
6597 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6598 }
6599 return off;
6600}
6601
6602
6603/**
6604 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
6605 */
6606static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6607{
6608 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
6609 if (idxLabel != UINT32_MAX)
6610 {
6611 iemNativeLabelDefine(pReNative, idxLabel, off);
6612
6613 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
6614
6615 /* jump back to the return sequence. */
6616 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6617 }
6618 return off;
6619}
6620
6621
6622/**
6623 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
6624 */
6625static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6626{
6627 /*
6628 * Generate the rc + rcPassUp fiddling code if needed.
6629 */
6630 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6631 if (idxLabel != UINT32_MAX)
6632 {
6633 iemNativeLabelDefine(pReNative, idxLabel, off);
6634
6635 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
6636#ifdef RT_ARCH_AMD64
6637# ifdef RT_OS_WINDOWS
6638# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6639 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
6640# endif
6641 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6642 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
6643# else
6644 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6645 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
6646# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6647 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
6648# endif
6649# endif
6650# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6651 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
6652# endif
6653
6654#else
6655 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
6656 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6657 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
6658#endif
6659
6660 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
6661 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6662 }
6663 return off;
6664}
6665
6666
6667/**
6668 * Emits a standard epilog.
6669 */
6670static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
6671{
6672 *pidxReturnLabel = UINT32_MAX;
6673
6674 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
6675 off = iemNativeRegFlushPendingWrites(pReNative, off);
6676
6677 /*
6678 * Successful return, so clear the return register (eax, w0).
6679 */
6680 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
6681
6682 /*
6683 * Define label for common return point.
6684 */
6685 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
6686 *pidxReturnLabel = idxReturn;
6687
6688 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6689
6690 /*
6691 * Restore registers and return.
6692 */
6693#ifdef RT_ARCH_AMD64
6694 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6695
6696 /* Reposition esp at the r15 restore point. */
6697 pbCodeBuf[off++] = X86_OP_REX_W;
6698 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
6699 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
6700 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
6701
6702 /* Pop non-volatile registers and return */
6703 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
6704 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
6705 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
6706 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
6707 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
6708 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
6709 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
6710 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
6711# ifdef RT_OS_WINDOWS
6712 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
6713 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
6714# endif
6715 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
6716 pbCodeBuf[off++] = 0xc9; /* leave */
6717 pbCodeBuf[off++] = 0xc3; /* ret */
6718 pbCodeBuf[off++] = 0xcc; /* int3 poison */
6719
6720#elif RT_ARCH_ARM64
6721 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6722
6723 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
6724 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
6725 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6726 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6727 IEMNATIVE_FRAME_VAR_SIZE / 8);
6728 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
6729 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6730 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6731 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6732 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6733 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6734 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6735 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6736 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6737 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6738 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6739 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6740
6741 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
6742 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
6743 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
6744 IEMNATIVE_FRAME_SAVE_REG_SIZE);
6745
6746 /* retab / ret */
6747# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
6748 if (1)
6749 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
6750 else
6751# endif
6752 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
6753
6754#else
6755# error "port me"
6756#endif
6757 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6758
6759 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
6760}
6761
6762
6763/**
6764 * Emits a standard prolog.
6765 */
6766static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6767{
6768#ifdef RT_ARCH_AMD64
6769 /*
6770 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
6771 * reserving 64 bytes for stack variables plus 4 non-register argument
6772 * slots. Fixed register assignment: xBX = pReNative;
6773 *
6774 * Since we always do the same register spilling, we can use the same
6775 * unwind description for all the code.
6776 */
6777 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6778 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
6779 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
6780 pbCodeBuf[off++] = 0x8b;
6781 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
6782 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
6783 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
6784# ifdef RT_OS_WINDOWS
6785 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
6786 pbCodeBuf[off++] = 0x8b;
6787 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
6788 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
6789 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
6790# else
6791 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
6792 pbCodeBuf[off++] = 0x8b;
6793 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
6794# endif
6795 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
6796 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
6797 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
6798 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
6799 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
6800 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
6801 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
6802 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
6803
6804# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6805 /* Save the frame pointer. */
6806 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
6807# endif
6808
6809 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
6810 X86_GREG_xSP,
6811 IEMNATIVE_FRAME_ALIGN_SIZE
6812 + IEMNATIVE_FRAME_VAR_SIZE
6813 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
6814 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
6815 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
6816 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
6817 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
6818
6819#elif RT_ARCH_ARM64
6820 /*
6821 * We set up a stack frame exactly like on x86, only we have to push the
6822 * return address our selves here. We save all non-volatile registers.
6823 */
6824 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
6825
6826# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
6827 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
6828 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
6829 * in any way conditional, so just emitting this instructions now and hoping for the best... */
6830 /* pacibsp */
6831 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
6832# endif
6833
6834 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
6835 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
6836 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6837 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6838 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
6839 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
6840 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6841 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6842 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6843 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6844 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6845 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6846 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6847 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6848 /* Save the BP and LR (ret address) registers at the top of the frame. */
6849 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6850 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6851 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6852 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
6853 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
6854 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
6855
6856 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
6857 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
6858
6859 /* mov r28, r0 */
6860 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
6861 /* mov r27, r1 */
6862 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
6863
6864# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6865 /* Save the frame pointer. */
6866 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
6867 ARMV8_A64_REG_X2);
6868# endif
6869
6870#else
6871# error "port me"
6872#endif
6873 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6874 return off;
6875}
6876
6877
6878/*********************************************************************************************************************************
6879* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
6880*********************************************************************************************************************************/
6881
6882/**
6883 * Internal work that allocates a variable with kind set to
6884 * kIemNativeVarKind_Invalid and no current stack allocation.
6885 *
6886 * The kind will either be set by the caller or later when the variable is first
6887 * assigned a value.
6888 *
6889 * @returns Unpacked index.
6890 * @internal
6891 */
6892static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
6893{
6894 Assert(cbType > 0 && cbType <= 64);
6895 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
6896 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
6897 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
6898 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
6899 pReNative->Core.aVars[idxVar].cbVar = cbType;
6900 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
6901 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6902 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
6903 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
6904 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
6905 pReNative->Core.aVars[idxVar].fRegAcquired = false;
6906 pReNative->Core.aVars[idxVar].u.uValue = 0;
6907 return idxVar;
6908}
6909
6910
6911/**
6912 * Internal work that allocates an argument variable w/o setting enmKind.
6913 *
6914 * @returns Unpacked index.
6915 * @internal
6916 */
6917static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
6918{
6919 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
6920 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
6921 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
6922
6923 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
6924 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
6925 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
6926 return idxVar;
6927}
6928
6929
6930/**
6931 * Gets the stack slot for a stack variable, allocating one if necessary.
6932 *
6933 * Calling this function implies that the stack slot will contain a valid
6934 * variable value. The caller deals with any register currently assigned to the
6935 * variable, typically by spilling it into the stack slot.
6936 *
6937 * @returns The stack slot number.
6938 * @param pReNative The recompiler state.
6939 * @param idxVar The variable.
6940 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
6941 */
6942DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
6943{
6944 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
6945 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
6946 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
6947
6948 /* Already got a slot? */
6949 uint8_t const idxStackSlot = pVar->idxStackSlot;
6950 if (idxStackSlot != UINT8_MAX)
6951 {
6952 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
6953 return idxStackSlot;
6954 }
6955
6956 /*
6957 * A single slot is easy to allocate.
6958 * Allocate them from the top end, closest to BP, to reduce the displacement.
6959 */
6960 if (pVar->cbVar <= sizeof(uint64_t))
6961 {
6962 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
6963 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6964 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
6965 pVar->idxStackSlot = (uint8_t)iSlot;
6966 Log11(("iemNativeVarSetKindToStack: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
6967 return (uint8_t)iSlot;
6968 }
6969
6970 /*
6971 * We need more than one stack slot.
6972 *
6973 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
6974 */
6975 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
6976 Assert(pVar->cbVar <= 64);
6977 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
6978 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
6979 uint32_t bmStack = ~pReNative->Core.bmStack;
6980 while (bmStack != UINT32_MAX)
6981 {
6982/** @todo allocate from the top to reduce BP displacement. */
6983 unsigned const iSlot = ASMBitFirstSetU32(bmStack) - 1;
6984 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6985 if (!(iSlot & fBitAlignMask))
6986 {
6987 if ((bmStack & (fBitAllocMask << iSlot)) == (fBitAllocMask << iSlot))
6988 {
6989 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
6990 pVar->idxStackSlot = (uint8_t)iSlot;
6991 Log11(("iemNativeVarSetKindToStack: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
6992 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
6993 return (uint8_t)iSlot;
6994 }
6995 }
6996 bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);
6997 }
6998 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
6999}
7000
7001
7002/**
7003 * Changes the variable to a stack variable.
7004 *
7005 * Currently this is s only possible to do the first time the variable is used,
7006 * switching later is can be implemented but not done.
7007 *
7008 * @param pReNative The recompiler state.
7009 * @param idxVar The variable.
7010 * @throws VERR_IEM_VAR_IPE_2
7011 */
7012DECL_HIDDEN_THROW(void) iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7013{
7014 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7015 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7016 if (pVar->enmKind != kIemNativeVarKind_Stack)
7017 {
7018 /* We could in theory transition from immediate to stack as well, but it
7019 would involve the caller doing work storing the value on the stack. So,
7020 till that's required we only allow transition from invalid. */
7021 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7022 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7023 pVar->enmKind = kIemNativeVarKind_Stack;
7024
7025 /* Note! We don't allocate a stack slot here, that's only done when a
7026 slot is actually needed to hold a variable value. */
7027 }
7028}
7029
7030
7031/**
7032 * Sets it to a variable with a constant value.
7033 *
7034 * This does not require stack storage as we know the value and can always
7035 * reload it, unless of course it's referenced.
7036 *
7037 * @param pReNative The recompiler state.
7038 * @param idxVar The variable.
7039 * @param uValue The immediate value.
7040 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7041 */
7042DECL_HIDDEN_THROW(void) iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
7043{
7044 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7045 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7046 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7047 {
7048 /* Only simple transitions for now. */
7049 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7050 pVar->enmKind = kIemNativeVarKind_Immediate;
7051 }
7052 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7053
7054 pVar->u.uValue = uValue;
7055 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
7056 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
7057 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
7058}
7059
7060
7061/**
7062 * Sets the variable to a reference (pointer) to @a idxOtherVar.
7063 *
7064 * This does not require stack storage as we know the value and can always
7065 * reload it. Loading is postponed till needed.
7066 *
7067 * @param pReNative The recompiler state.
7068 * @param idxVar The variable. Unpacked.
7069 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
7070 *
7071 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7072 * @internal
7073 */
7074static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
7075{
7076 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
7077 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
7078
7079 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
7080 {
7081 /* Only simple transitions for now. */
7082 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7083 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7084 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
7085 }
7086 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7087
7088 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
7089
7090 /* Update the other variable, ensure it's a stack variable. */
7091 /** @todo handle variables with const values... that'll go boom now. */
7092 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
7093 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
7094}
7095
7096
7097/**
7098 * Sets the variable to a reference (pointer) to a guest register reference.
7099 *
7100 * This does not require stack storage as we know the value and can always
7101 * reload it. Loading is postponed till needed.
7102 *
7103 * @param pReNative The recompiler state.
7104 * @param idxVar The variable.
7105 * @param enmRegClass The class guest registers to reference.
7106 * @param idxReg The register within @a enmRegClass to reference.
7107 *
7108 * @throws VERR_IEM_VAR_IPE_2
7109 */
7110DECL_HIDDEN_THROW(void) iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
7111 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
7112{
7113 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7114 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7115
7116 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
7117 {
7118 /* Only simple transitions for now. */
7119 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7120 pVar->enmKind = kIemNativeVarKind_GstRegRef;
7121 }
7122 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7123
7124 pVar->u.GstRegRef.enmClass = enmRegClass;
7125 pVar->u.GstRegRef.idx = idxReg;
7126}
7127
7128
7129DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7130{
7131 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7132}
7133
7134
7135DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
7136{
7137 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7138
7139 /* Since we're using a generic uint64_t value type, we must truncate it if
7140 the variable is smaller otherwise we may end up with too large value when
7141 scaling up a imm8 w/ sign-extension.
7142
7143 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
7144 in the bios, bx=1) when running on arm, because clang expect 16-bit
7145 register parameters to have bits 16 and up set to zero. Instead of
7146 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
7147 CF value in the result. */
7148 switch (cbType)
7149 {
7150 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7151 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7152 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7153 }
7154 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7155 return idxVar;
7156}
7157
7158
7159DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
7160{
7161 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
7162 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
7163 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
7164 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
7165 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
7166 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7167
7168 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
7169 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
7170 return idxArgVar;
7171}
7172
7173
7174DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7175{
7176 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7177 /* Don't set to stack now, leave that to the first use as for instance
7178 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
7179 return idxVar;
7180}
7181
7182
7183DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
7184{
7185 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7186
7187 /* Since we're using a generic uint64_t value type, we must truncate it if
7188 the variable is smaller otherwise we may end up with too large value when
7189 scaling up a imm8 w/ sign-extension. */
7190 switch (cbType)
7191 {
7192 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7193 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7194 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7195 }
7196 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7197 return idxVar;
7198}
7199
7200
7201/**
7202 * Makes sure variable @a idxVar has a register assigned to it and that it stays
7203 * fixed till we call iemNativeVarRegisterRelease.
7204 *
7205 * @returns The host register number.
7206 * @param pReNative The recompiler state.
7207 * @param idxVar The variable.
7208 * @param poff Pointer to the instruction buffer offset.
7209 * In case a register needs to be freed up or the value
7210 * loaded off the stack.
7211 * @param fInitialized Set if the variable must already have been initialized.
7212 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7213 * the case.
7214 * @param idxRegPref Preferred register number or UINT8_MAX.
7215 */
7216DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7217 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7218{
7219 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7220 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7221 Assert(pVar->cbVar <= 8);
7222 Assert(!pVar->fRegAcquired);
7223
7224 uint8_t idxReg = pVar->idxReg;
7225 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7226 {
7227 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7228 && pVar->enmKind < kIemNativeVarKind_End);
7229 pVar->fRegAcquired = true;
7230 return idxReg;
7231 }
7232
7233 /*
7234 * If the kind of variable has not yet been set, default to 'stack'.
7235 */
7236 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7237 && pVar->enmKind < kIemNativeVarKind_End);
7238 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7239 iemNativeVarSetKindToStack(pReNative, idxVar);
7240
7241 /*
7242 * We have to allocate a register for the variable, even if its a stack one
7243 * as we don't know if there are modification being made to it before its
7244 * finalized (todo: analyze and insert hints about that?).
7245 *
7246 * If we can, we try get the correct register for argument variables. This
7247 * is assuming that most argument variables are fetched as close as possible
7248 * to the actual call, so that there aren't any interfering hidden calls
7249 * (memory accesses, etc) inbetween.
7250 *
7251 * If we cannot or it's a variable, we make sure no argument registers
7252 * that will be used by this MC block will be allocated here, and we always
7253 * prefer non-volatile registers to avoid needing to spill stuff for internal
7254 * call.
7255 */
7256 /** @todo Detect too early argument value fetches and warn about hidden
7257 * calls causing less optimal code to be generated in the python script. */
7258
7259 uint8_t const uArgNo = pVar->uArgNo;
7260 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
7261 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
7262 {
7263 idxReg = g_aidxIemNativeCallRegs[uArgNo];
7264 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7265 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
7266 }
7267 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
7268 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
7269 {
7270 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7271 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
7272 & ~pReNative->Core.bmHstRegsWithGstShadow
7273 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
7274 & fNotArgsMask;
7275 if (fRegs)
7276 {
7277 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
7278 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
7279 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
7280 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
7281 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
7282 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7283 }
7284 else
7285 {
7286 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7287 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
7288 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7289 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7290 }
7291 }
7292 else
7293 {
7294 idxReg = idxRegPref;
7295 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7296 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7297 }
7298 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7299 pVar->idxReg = idxReg;
7300
7301 /*
7302 * Load it off the stack if we've got a stack slot.
7303 */
7304 uint8_t const idxStackSlot = pVar->idxStackSlot;
7305 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7306 {
7307 Assert(fInitialized);
7308 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7309 switch (pVar->cbVar)
7310 {
7311 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
7312 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
7313 case 3: AssertFailed(); RT_FALL_THRU();
7314 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
7315 default: AssertFailed(); RT_FALL_THRU();
7316 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
7317 }
7318 }
7319 else
7320 {
7321 Assert(idxStackSlot == UINT8_MAX);
7322 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7323 }
7324 pVar->fRegAcquired = true;
7325 return idxReg;
7326}
7327
7328
7329/**
7330 * The value of variable @a idxVar will be written in full to the @a enmGstReg
7331 * guest register.
7332 *
7333 * This function makes sure there is a register for it and sets it to be the
7334 * current shadow copy of @a enmGstReg.
7335 *
7336 * @returns The host register number.
7337 * @param pReNative The recompiler state.
7338 * @param idxVar The variable.
7339 * @param enmGstReg The guest register this variable will be written to
7340 * after this call.
7341 * @param poff Pointer to the instruction buffer offset.
7342 * In case a register needs to be freed up or if the
7343 * variable content needs to be loaded off the stack.
7344 *
7345 * @note We DO NOT expect @a idxVar to be an argument variable,
7346 * because we can only in the commit stage of an instruction when this
7347 * function is used.
7348 */
7349DECL_HIDDEN_THROW(uint8_t)
7350iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
7351{
7352 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7353 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7354 Assert(!pVar->fRegAcquired);
7355 AssertMsgStmt( pVar->cbVar <= 8
7356 && ( pVar->enmKind == kIemNativeVarKind_Immediate
7357 || pVar->enmKind == kIemNativeVarKind_Stack),
7358 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
7359 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
7360 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7361
7362 /*
7363 * This shouldn't ever be used for arguments, unless it's in a weird else
7364 * branch that doesn't do any calling and even then it's questionable.
7365 *
7366 * However, in case someone writes crazy wrong MC code and does register
7367 * updates before making calls, just use the regular register allocator to
7368 * ensure we get a register suitable for the intended argument number.
7369 */
7370 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
7371
7372 /*
7373 * If there is already a register for the variable, we transfer/set the
7374 * guest shadow copy assignment to it.
7375 */
7376 uint8_t idxReg = pVar->idxReg;
7377 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7378 {
7379 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
7380 {
7381 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
7382 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
7383 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
7384 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
7385 }
7386 else
7387 {
7388 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
7389 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
7390 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
7391 }
7392 /** @todo figure this one out. We need some way of making sure the register isn't
7393 * modified after this point, just in case we start writing crappy MC code. */
7394 pVar->enmGstReg = enmGstReg;
7395 pVar->fRegAcquired = true;
7396 return idxReg;
7397 }
7398 Assert(pVar->uArgNo == UINT8_MAX);
7399
7400 /*
7401 * Because this is supposed to be the commit stage, we're just tag along with the
7402 * temporary register allocator and upgrade it to a variable register.
7403 */
7404 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
7405 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
7406 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
7407 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
7408 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
7409 pVar->idxReg = idxReg;
7410
7411 /*
7412 * Now we need to load the register value.
7413 */
7414 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7415 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7416 else
7417 {
7418 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7419 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7420 switch (pVar->cbVar)
7421 {
7422 case sizeof(uint64_t):
7423 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
7424 break;
7425 case sizeof(uint32_t):
7426 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
7427 break;
7428 case sizeof(uint16_t):
7429 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
7430 break;
7431 case sizeof(uint8_t):
7432 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
7433 break;
7434 default:
7435 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7436 }
7437 }
7438
7439 pVar->fRegAcquired = true;
7440 return idxReg;
7441}
7442
7443
7444/**
7445 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
7446 *
7447 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
7448 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
7449 * requirement of flushing anything in volatile host registers when making a
7450 * call.
7451 *
7452 * @returns New @a off value.
7453 * @param pReNative The recompiler state.
7454 * @param off The code buffer position.
7455 * @param fHstRegsNotToSave Set of registers not to save & restore.
7456 */
7457DECL_HIDDEN_THROW(uint32_t)
7458iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7459{
7460 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7461 if (fHstRegs)
7462 {
7463 do
7464 {
7465 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7466 fHstRegs &= ~RT_BIT_32(idxHstReg);
7467
7468 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7469 {
7470 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7471 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7472 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7473 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7474 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7475 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7476 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7477 {
7478 case kIemNativeVarKind_Stack:
7479 {
7480 /* Temporarily spill the variable register. */
7481 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7482 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7483 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7484 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7485 continue;
7486 }
7487
7488 case kIemNativeVarKind_Immediate:
7489 case kIemNativeVarKind_VarRef:
7490 case kIemNativeVarKind_GstRegRef:
7491 /* It is weird to have any of these loaded at this point. */
7492 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7493 continue;
7494
7495 case kIemNativeVarKind_End:
7496 case kIemNativeVarKind_Invalid:
7497 break;
7498 }
7499 AssertFailed();
7500 }
7501 else
7502 {
7503 /*
7504 * Allocate a temporary stack slot and spill the register to it.
7505 */
7506 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7507 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
7508 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7509 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
7510 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
7511 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7512 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7513 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7514 }
7515 } while (fHstRegs);
7516 }
7517 return off;
7518}
7519
7520
7521/**
7522 * Emit code to restore volatile registers after to a call to a helper.
7523 *
7524 * @returns New @a off value.
7525 * @param pReNative The recompiler state.
7526 * @param off The code buffer position.
7527 * @param fHstRegsNotToSave Set of registers not to save & restore.
7528 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
7529 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
7530 */
7531DECL_HIDDEN_THROW(uint32_t)
7532iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7533{
7534 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7535 if (fHstRegs)
7536 {
7537 do
7538 {
7539 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7540 fHstRegs &= ~RT_BIT_32(idxHstReg);
7541
7542 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7543 {
7544 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7545 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7546 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7547 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7548 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7549 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7550 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7551 {
7552 case kIemNativeVarKind_Stack:
7553 {
7554 /* Unspill the variable register. */
7555 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7556 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
7557 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7558 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7559 continue;
7560 }
7561
7562 case kIemNativeVarKind_Immediate:
7563 case kIemNativeVarKind_VarRef:
7564 case kIemNativeVarKind_GstRegRef:
7565 /* It is weird to have any of these loaded at this point. */
7566 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7567 continue;
7568
7569 case kIemNativeVarKind_End:
7570 case kIemNativeVarKind_Invalid:
7571 break;
7572 }
7573 AssertFailed();
7574 }
7575 else
7576 {
7577 /*
7578 * Restore from temporary stack slot.
7579 */
7580 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
7581 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
7582 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
7583 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
7584
7585 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7586 }
7587 } while (fHstRegs);
7588 }
7589 return off;
7590}
7591
7592
7593/**
7594 * Worker that frees the stack slots for variable @a idxVar if any allocated.
7595 *
7596 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
7597 *
7598 * ASSUMES that @a idxVar is valid and unpacked.
7599 */
7600DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7601{
7602 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
7603 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
7604 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7605 {
7606 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
7607 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
7608 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
7609 Assert(cSlots > 0);
7610 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
7611 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7612 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
7613 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
7614 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7615 }
7616 else
7617 Assert(idxStackSlot == UINT8_MAX);
7618}
7619
7620
7621/**
7622 * Worker that frees a single variable.
7623 *
7624 * ASSUMES that @a idxVar is valid and unpacked.
7625 */
7626DECLHIDDEN(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7627{
7628 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
7629 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
7630 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
7631
7632 /* Free the host register first if any assigned. */
7633 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7634 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7635 {
7636 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
7637 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
7638 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7639 }
7640
7641 /* Free argument mapping. */
7642 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
7643 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
7644 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
7645
7646 /* Free the stack slots. */
7647 iemNativeVarFreeStackSlots(pReNative, idxVar);
7648
7649 /* Free the actual variable. */
7650 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7651 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
7652}
7653
7654
7655/**
7656 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
7657 */
7658DECLHIDDEN(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
7659{
7660 while (bmVars != 0)
7661 {
7662 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
7663 bmVars &= ~RT_BIT_32(idxVar);
7664
7665#if 1 /** @todo optimize by simplifying this later... */
7666 iemNativeVarFreeOneWorker(pReNative, idxVar);
7667#else
7668 /* Only need to free the host register, the rest is done as bulk updates below. */
7669 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
7670 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7671 {
7672 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
7673 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
7674 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
7675 }
7676#endif
7677 }
7678#if 0 /** @todo optimize by simplifying this later... */
7679 pReNative->Core.bmVars = 0;
7680 pReNative->Core.bmStack = 0;
7681 pReNative->Core.u64ArgVars = UINT64_MAX;
7682#endif
7683}
7684
7685
7686
7687/*********************************************************************************************************************************
7688* Emitters for IEM_MC_CALL_CIMPL_XXX *
7689*********************************************************************************************************************************/
7690
7691/**
7692 * Emits code to load a reference to the given guest register into @a idxGprDst.
7693 */
7694DECL_HIDDEN_THROW(uint32_t)
7695iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
7696 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
7697{
7698#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7699 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
7700#endif
7701
7702 /*
7703 * Get the offset relative to the CPUMCTX structure.
7704 */
7705 uint32_t offCpumCtx;
7706 switch (enmClass)
7707 {
7708 case kIemNativeGstRegRef_Gpr:
7709 Assert(idxRegInClass < 16);
7710 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
7711 break;
7712
7713 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
7714 Assert(idxRegInClass < 4);
7715 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
7716 break;
7717
7718 case kIemNativeGstRegRef_EFlags:
7719 Assert(idxRegInClass == 0);
7720 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
7721 break;
7722
7723 case kIemNativeGstRegRef_MxCsr:
7724 Assert(idxRegInClass == 0);
7725 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
7726 break;
7727
7728 case kIemNativeGstRegRef_FpuReg:
7729 Assert(idxRegInClass < 8);
7730 AssertFailed(); /** @todo what kind of indexing? */
7731 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
7732 break;
7733
7734 case kIemNativeGstRegRef_MReg:
7735 Assert(idxRegInClass < 8);
7736 AssertFailed(); /** @todo what kind of indexing? */
7737 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
7738 break;
7739
7740 case kIemNativeGstRegRef_XReg:
7741 Assert(idxRegInClass < 16);
7742 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
7743 break;
7744
7745 case kIemNativeGstRegRef_X87: /* Not a register actually but we would just duplicate code otherwise. */
7746 Assert(idxRegInClass == 0);
7747 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87);
7748 break;
7749
7750 case kIemNativeGstRegRef_XState: /* Not a register actually but we would just duplicate code otherwise. */
7751 Assert(idxRegInClass == 0);
7752 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState);
7753 break;
7754
7755 default:
7756 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
7757 }
7758
7759 /*
7760 * Load the value into the destination register.
7761 */
7762#ifdef RT_ARCH_AMD64
7763 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
7764
7765#elif defined(RT_ARCH_ARM64)
7766 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7767 Assert(offCpumCtx < 4096);
7768 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
7769
7770#else
7771# error "Port me!"
7772#endif
7773
7774 return off;
7775}
7776
7777
7778/**
7779 * Common code for CIMPL and AIMPL calls.
7780 *
7781 * These are calls that uses argument variables and such. They should not be
7782 * confused with internal calls required to implement an MC operation,
7783 * like a TLB load and similar.
7784 *
7785 * Upon return all that is left to do is to load any hidden arguments and
7786 * perform the call. All argument variables are freed.
7787 *
7788 * @returns New code buffer offset; throws VBox status code on error.
7789 * @param pReNative The native recompile state.
7790 * @param off The code buffer offset.
7791 * @param cArgs The total nubmer of arguments (includes hidden
7792 * count).
7793 * @param cHiddenArgs The number of hidden arguments. The hidden
7794 * arguments must not have any variable declared for
7795 * them, whereas all the regular arguments must
7796 * (tstIEMCheckMc ensures this).
7797 */
7798DECL_HIDDEN_THROW(uint32_t)
7799iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
7800{
7801#ifdef VBOX_STRICT
7802 /*
7803 * Assert sanity.
7804 */
7805 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
7806 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
7807 for (unsigned i = 0; i < cHiddenArgs; i++)
7808 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
7809 for (unsigned i = cHiddenArgs; i < cArgs; i++)
7810 {
7811 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
7812 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
7813 }
7814 iemNativeRegAssertSanity(pReNative);
7815#endif
7816
7817 /* We don't know what the called function makes use of, so flush any pending register writes. */
7818 off = iemNativeRegFlushPendingWrites(pReNative, off);
7819
7820 /*
7821 * Before we do anything else, go over variables that are referenced and
7822 * make sure they are not in a register.
7823 */
7824 uint32_t bmVars = pReNative->Core.bmVars;
7825 if (bmVars)
7826 {
7827 do
7828 {
7829 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
7830 bmVars &= ~RT_BIT_32(idxVar);
7831
7832 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
7833 {
7834 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
7835 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
7836 {
7837 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
7838 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
7839 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
7840 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7841 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
7842
7843 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7844 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
7845 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
7846 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
7847 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
7848 }
7849 }
7850 } while (bmVars != 0);
7851#if 0 //def VBOX_STRICT
7852 iemNativeRegAssertSanity(pReNative);
7853#endif
7854 }
7855
7856 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
7857
7858 /*
7859 * First, go over the host registers that will be used for arguments and make
7860 * sure they either hold the desired argument or are free.
7861 */
7862 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
7863 {
7864 for (uint32_t i = 0; i < cRegArgs; i++)
7865 {
7866 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
7867 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
7868 {
7869 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
7870 {
7871 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
7872 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7873 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7874 Assert(pVar->idxReg == idxArgReg);
7875 uint8_t const uArgNo = pVar->uArgNo;
7876 if (uArgNo == i)
7877 { /* prefect */ }
7878 /* The variable allocator logic should make sure this is impossible,
7879 except for when the return register is used as a parameter (ARM,
7880 but not x86). */
7881#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
7882 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
7883 {
7884# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
7885# error "Implement this"
7886# endif
7887 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
7888 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
7889 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
7890 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
7891 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
7892 }
7893#endif
7894 else
7895 {
7896 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
7897
7898 if (pVar->enmKind == kIemNativeVarKind_Stack)
7899 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
7900 else
7901 {
7902 /* just free it, can be reloaded if used again */
7903 pVar->idxReg = UINT8_MAX;
7904 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
7905 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
7906 }
7907 }
7908 }
7909 else
7910 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
7911 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
7912 }
7913 }
7914#if 0 //def VBOX_STRICT
7915 iemNativeRegAssertSanity(pReNative);
7916#endif
7917 }
7918
7919 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
7920
7921#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
7922 /*
7923 * If there are any stack arguments, make sure they are in their place as well.
7924 *
7925 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
7926 * the caller) be loading it later and it must be free (see first loop).
7927 */
7928 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
7929 {
7930 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
7931 {
7932 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
7933 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
7934 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7935 {
7936 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
7937 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
7938 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
7939 pVar->idxReg = UINT8_MAX;
7940 }
7941 else
7942 {
7943 /* Use ARG0 as temp for stuff we need registers for. */
7944 switch (pVar->enmKind)
7945 {
7946 case kIemNativeVarKind_Stack:
7947 {
7948 uint8_t const idxStackSlot = pVar->idxStackSlot;
7949 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7950 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
7951 iemNativeStackCalcBpDisp(idxStackSlot));
7952 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
7953 continue;
7954 }
7955
7956 case kIemNativeVarKind_Immediate:
7957 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
7958 continue;
7959
7960 case kIemNativeVarKind_VarRef:
7961 {
7962 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
7963 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
7964 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
7965 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
7966 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
7967 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
7968 {
7969 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
7970 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
7971 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
7972 }
7973 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
7974 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
7975 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
7976 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
7977 continue;
7978 }
7979
7980 case kIemNativeVarKind_GstRegRef:
7981 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
7982 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
7983 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
7984 continue;
7985
7986 case kIemNativeVarKind_Invalid:
7987 case kIemNativeVarKind_End:
7988 break;
7989 }
7990 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
7991 }
7992 }
7993# if 0 //def VBOX_STRICT
7994 iemNativeRegAssertSanity(pReNative);
7995# endif
7996 }
7997#else
7998 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
7999#endif
8000
8001 /*
8002 * Make sure the argument variables are loaded into their respective registers.
8003 *
8004 * We can optimize this by ASSUMING that any register allocations are for
8005 * registeres that have already been loaded and are ready. The previous step
8006 * saw to that.
8007 */
8008 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
8009 {
8010 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8011 {
8012 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8013 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8014 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
8015 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
8016 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
8017 else
8018 {
8019 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8020 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8021 {
8022 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
8023 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
8024 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
8025 | RT_BIT_32(idxArgReg);
8026 pVar->idxReg = idxArgReg;
8027 }
8028 else
8029 {
8030 /* Use ARG0 as temp for stuff we need registers for. */
8031 switch (pVar->enmKind)
8032 {
8033 case kIemNativeVarKind_Stack:
8034 {
8035 uint8_t const idxStackSlot = pVar->idxStackSlot;
8036 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8037 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
8038 continue;
8039 }
8040
8041 case kIemNativeVarKind_Immediate:
8042 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
8043 continue;
8044
8045 case kIemNativeVarKind_VarRef:
8046 {
8047 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8048 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8049 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
8050 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8051 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8052 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8053 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8054 {
8055 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8056 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8057 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8058 }
8059 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8060 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8061 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
8062 continue;
8063 }
8064
8065 case kIemNativeVarKind_GstRegRef:
8066 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
8067 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8068 continue;
8069
8070 case kIemNativeVarKind_Invalid:
8071 case kIemNativeVarKind_End:
8072 break;
8073 }
8074 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8075 }
8076 }
8077 }
8078#if 0 //def VBOX_STRICT
8079 iemNativeRegAssertSanity(pReNative);
8080#endif
8081 }
8082#ifdef VBOX_STRICT
8083 else
8084 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8085 {
8086 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
8087 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
8088 }
8089#endif
8090
8091 /*
8092 * Free all argument variables (simplified).
8093 * Their lifetime always expires with the call they are for.
8094 */
8095 /** @todo Make the python script check that arguments aren't used after
8096 * IEM_MC_CALL_XXXX. */
8097 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
8098 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
8099 * an argument value. There is also some FPU stuff. */
8100 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
8101 {
8102 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
8103 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8104
8105 /* no need to free registers: */
8106 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
8107 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
8108 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
8109 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
8110 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
8111 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
8112
8113 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
8114 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8115 iemNativeVarFreeStackSlots(pReNative, idxVar);
8116 }
8117 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8118
8119 /*
8120 * Flush volatile registers as we make the call.
8121 */
8122 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
8123
8124 return off;
8125}
8126
8127
8128
8129/*********************************************************************************************************************************
8130* TLB Lookup. *
8131*********************************************************************************************************************************/
8132
8133/**
8134 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
8135 */
8136DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
8137{
8138 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
8139 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
8140 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
8141 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
8142
8143 /* Do the lookup manually. */
8144 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
8145 uint64_t const uTag = IEMTLB_CALC_TAG( &pVCpu->iem.s.DataTlb, GCPtrFlat);
8146 PIEMTLBENTRY const pTlbe = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
8147 if (RT_LIKELY(pTlbe->uTag == uTag))
8148 {
8149 /*
8150 * Check TLB page table level access flags.
8151 */
8152 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
8153 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
8154 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
8155 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
8156 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
8157 | IEMTLBE_F_PG_UNASSIGNED
8158 | IEMTLBE_F_PT_NO_ACCESSED
8159 | fNoWriteNoDirty | fNoUser);
8160 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
8161 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
8162 {
8163 /*
8164 * Return the address.
8165 */
8166 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
8167 if ((uintptr_t)pbAddr == uResult)
8168 return;
8169 RT_NOREF(cbMem);
8170 AssertFailed();
8171 }
8172 else
8173 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
8174 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
8175 }
8176 else
8177 AssertFailed();
8178 RT_BREAKPOINT();
8179}
8180
8181/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
8182
8183
8184
8185/*********************************************************************************************************************************
8186* Recompiler Core. *
8187*********************************************************************************************************************************/
8188
8189/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
8190static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
8191{
8192 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
8193 pDis->cbCachedInstr += cbMaxRead;
8194 RT_NOREF(cbMinRead);
8195 return VERR_NO_DATA;
8196}
8197
8198
8199DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
8200{
8201 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
8202 {
8203#define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */
8204 ENTRY(fLocalForcedActions),
8205 ENTRY(iem.s.rcPassUp),
8206 ENTRY(iem.s.fExec),
8207 ENTRY(iem.s.pbInstrBuf),
8208 ENTRY(iem.s.uInstrBufPc),
8209 ENTRY(iem.s.GCPhysInstrBuf),
8210 ENTRY(iem.s.cbInstrBufTotal),
8211 ENTRY(iem.s.idxTbCurInstr),
8212#ifdef VBOX_WITH_STATISTICS
8213 ENTRY(iem.s.StatNativeTlbHitsForFetch),
8214 ENTRY(iem.s.StatNativeTlbHitsForStore),
8215 ENTRY(iem.s.StatNativeTlbHitsForStack),
8216 ENTRY(iem.s.StatNativeTlbHitsForMapped),
8217 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
8218 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
8219 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
8220 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
8221#endif
8222 ENTRY(iem.s.DataTlb.aEntries),
8223 ENTRY(iem.s.DataTlb.uTlbRevision),
8224 ENTRY(iem.s.DataTlb.uTlbPhysRev),
8225 ENTRY(iem.s.DataTlb.cTlbHits),
8226 ENTRY(iem.s.CodeTlb.aEntries),
8227 ENTRY(iem.s.CodeTlb.uTlbRevision),
8228 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
8229 ENTRY(iem.s.CodeTlb.cTlbHits),
8230 ENTRY(pVMR3),
8231 ENTRY(cpum.GstCtx.rax),
8232 ENTRY(cpum.GstCtx.ah),
8233 ENTRY(cpum.GstCtx.rcx),
8234 ENTRY(cpum.GstCtx.ch),
8235 ENTRY(cpum.GstCtx.rdx),
8236 ENTRY(cpum.GstCtx.dh),
8237 ENTRY(cpum.GstCtx.rbx),
8238 ENTRY(cpum.GstCtx.bh),
8239 ENTRY(cpum.GstCtx.rsp),
8240 ENTRY(cpum.GstCtx.rbp),
8241 ENTRY(cpum.GstCtx.rsi),
8242 ENTRY(cpum.GstCtx.rdi),
8243 ENTRY(cpum.GstCtx.r8),
8244 ENTRY(cpum.GstCtx.r9),
8245 ENTRY(cpum.GstCtx.r10),
8246 ENTRY(cpum.GstCtx.r11),
8247 ENTRY(cpum.GstCtx.r12),
8248 ENTRY(cpum.GstCtx.r13),
8249 ENTRY(cpum.GstCtx.r14),
8250 ENTRY(cpum.GstCtx.r15),
8251 ENTRY(cpum.GstCtx.es.Sel),
8252 ENTRY(cpum.GstCtx.es.u64Base),
8253 ENTRY(cpum.GstCtx.es.u32Limit),
8254 ENTRY(cpum.GstCtx.es.Attr),
8255 ENTRY(cpum.GstCtx.cs.Sel),
8256 ENTRY(cpum.GstCtx.cs.u64Base),
8257 ENTRY(cpum.GstCtx.cs.u32Limit),
8258 ENTRY(cpum.GstCtx.cs.Attr),
8259 ENTRY(cpum.GstCtx.ss.Sel),
8260 ENTRY(cpum.GstCtx.ss.u64Base),
8261 ENTRY(cpum.GstCtx.ss.u32Limit),
8262 ENTRY(cpum.GstCtx.ss.Attr),
8263 ENTRY(cpum.GstCtx.ds.Sel),
8264 ENTRY(cpum.GstCtx.ds.u64Base),
8265 ENTRY(cpum.GstCtx.ds.u32Limit),
8266 ENTRY(cpum.GstCtx.ds.Attr),
8267 ENTRY(cpum.GstCtx.fs.Sel),
8268 ENTRY(cpum.GstCtx.fs.u64Base),
8269 ENTRY(cpum.GstCtx.fs.u32Limit),
8270 ENTRY(cpum.GstCtx.fs.Attr),
8271 ENTRY(cpum.GstCtx.gs.Sel),
8272 ENTRY(cpum.GstCtx.gs.u64Base),
8273 ENTRY(cpum.GstCtx.gs.u32Limit),
8274 ENTRY(cpum.GstCtx.gs.Attr),
8275 ENTRY(cpum.GstCtx.rip),
8276 ENTRY(cpum.GstCtx.eflags),
8277 ENTRY(cpum.GstCtx.uRipInhibitInt),
8278 ENTRY(cpum.GstCtx.cr0),
8279 ENTRY(cpum.GstCtx.cr4),
8280 ENTRY(cpum.GstCtx.aXcr[0]),
8281 ENTRY(cpum.GstCtx.aXcr[1]),
8282#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8283 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
8284 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
8285 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
8286 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
8287 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
8288 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
8289 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
8290 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
8291 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
8292 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
8293 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
8294 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
8295 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
8296 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
8297 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
8298 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
8299 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
8300 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
8301 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
8302 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
8303 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
8304 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
8305 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
8306 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
8307 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
8308 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
8309 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
8310 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
8311 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
8312 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
8313 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
8314 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
8315#endif
8316#undef ENTRY
8317 };
8318#ifdef VBOX_STRICT
8319 static bool s_fOrderChecked = false;
8320 if (!s_fOrderChecked)
8321 {
8322 s_fOrderChecked = true;
8323 uint32_t offPrev = s_aMembers[0].off;
8324 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
8325 {
8326 Assert(s_aMembers[i].off > offPrev);
8327 offPrev = s_aMembers[i].off;
8328 }
8329 }
8330#endif
8331
8332 /*
8333 * Binary lookup.
8334 */
8335 unsigned iStart = 0;
8336 unsigned iEnd = RT_ELEMENTS(s_aMembers);
8337 for (;;)
8338 {
8339 unsigned const iCur = iStart + (iEnd - iStart) / 2;
8340 uint32_t const offCur = s_aMembers[iCur].off;
8341 if (off < offCur)
8342 {
8343 if (iCur != iStart)
8344 iEnd = iCur;
8345 else
8346 break;
8347 }
8348 else if (off > offCur)
8349 {
8350 if (iCur + 1 < iEnd)
8351 iStart = iCur + 1;
8352 else
8353 break;
8354 }
8355 else
8356 return s_aMembers[iCur].pszName;
8357 }
8358#ifdef VBOX_WITH_STATISTICS
8359 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
8360 return "iem.s.acThreadedFuncStats[iFn]";
8361#endif
8362 return NULL;
8363}
8364
8365
8366DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
8367{
8368 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
8369#if defined(RT_ARCH_AMD64)
8370 static const char * const a_apszMarkers[] =
8371 {
8372 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
8373 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
8374 };
8375#endif
8376
8377 char szDisBuf[512];
8378 DISSTATE Dis;
8379 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
8380 uint32_t const cNative = pTb->Native.cInstructions;
8381 uint32_t offNative = 0;
8382#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8383 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
8384#endif
8385 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
8386 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
8387 : DISCPUMODE_64BIT;
8388#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8389 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
8390#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8391 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
8392#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8393# error "Port me"
8394#else
8395 csh hDisasm = ~(size_t)0;
8396# if defined(RT_ARCH_AMD64)
8397 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
8398# elif defined(RT_ARCH_ARM64)
8399 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
8400# else
8401# error "Port me"
8402# endif
8403 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
8404
8405 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
8406 //Assert(rcCs == CS_ERR_OK);
8407#endif
8408
8409 /*
8410 * Print TB info.
8411 */
8412 pHlp->pfnPrintf(pHlp,
8413 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
8414 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
8415 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
8416 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
8417#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8418 if (pDbgInfo && pDbgInfo->cEntries > 1)
8419 {
8420 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
8421
8422 /*
8423 * This disassembly is driven by the debug info which follows the native
8424 * code and indicates when it starts with the next guest instructions,
8425 * where labels are and such things.
8426 */
8427 uint32_t idxThreadedCall = 0;
8428 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
8429 uint8_t idxRange = UINT8_MAX;
8430 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
8431 uint32_t offRange = 0;
8432 uint32_t offOpcodes = 0;
8433 uint32_t const cbOpcodes = pTb->cbOpcodes;
8434 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
8435 uint32_t const cDbgEntries = pDbgInfo->cEntries;
8436 uint32_t iDbgEntry = 1;
8437 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
8438
8439 while (offNative < cNative)
8440 {
8441 /* If we're at or have passed the point where the next chunk of debug
8442 info starts, process it. */
8443 if (offDbgNativeNext <= offNative)
8444 {
8445 offDbgNativeNext = UINT32_MAX;
8446 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
8447 {
8448 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
8449 {
8450 case kIemTbDbgEntryType_GuestInstruction:
8451 {
8452 /* Did the exec flag change? */
8453 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
8454 {
8455 pHlp->pfnPrintf(pHlp,
8456 " fExec change %#08x -> %#08x %s\n",
8457 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
8458 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
8459 szDisBuf, sizeof(szDisBuf)));
8460 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
8461 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
8462 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
8463 : DISCPUMODE_64BIT;
8464 }
8465
8466 /* New opcode range? We need to fend up a spurious debug info entry here for cases
8467 where the compilation was aborted before the opcode was recorded and the actual
8468 instruction was translated to a threaded call. This may happen when we run out
8469 of ranges, or when some complicated interrupts/FFs are found to be pending or
8470 similar. So, we just deal with it here rather than in the compiler code as it
8471 is a lot simpler to do here. */
8472 if ( idxRange == UINT8_MAX
8473 || idxRange >= cRanges
8474 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
8475 {
8476 idxRange += 1;
8477 if (idxRange < cRanges)
8478 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
8479 else
8480 continue;
8481 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
8482 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
8483 + (pTb->aRanges[idxRange].idxPhysPage == 0
8484 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
8485 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
8486 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
8487 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
8488 pTb->aRanges[idxRange].idxPhysPage);
8489 GCPhysPc += offRange;
8490 }
8491
8492 /* Disassemble the instruction. */
8493 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
8494 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
8495 uint32_t cbInstr = 1;
8496 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
8497 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
8498 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
8499 if (RT_SUCCESS(rc))
8500 {
8501 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8502 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8503 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8504 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8505
8506 static unsigned const s_offMarker = 55;
8507 static char const s_szMarker[] = " ; <--- guest";
8508 if (cch < s_offMarker)
8509 {
8510 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
8511 cch = s_offMarker;
8512 }
8513 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
8514 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
8515
8516 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
8517 }
8518 else
8519 {
8520 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
8521 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
8522 cbInstr = 1;
8523 }
8524 GCPhysPc += cbInstr;
8525 offOpcodes += cbInstr;
8526 offRange += cbInstr;
8527 continue;
8528 }
8529
8530 case kIemTbDbgEntryType_ThreadedCall:
8531 pHlp->pfnPrintf(pHlp,
8532 " Call #%u to %s (%u args) - %s\n",
8533 idxThreadedCall,
8534 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
8535 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
8536 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
8537 idxThreadedCall++;
8538 continue;
8539
8540 case kIemTbDbgEntryType_GuestRegShadowing:
8541 {
8542 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
8543 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
8544 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
8545 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
8546 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
8547 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
8548 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
8549 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
8550 else
8551 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
8552 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
8553 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
8554 continue;
8555 }
8556
8557#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8558 case kIemTbDbgEntryType_GuestSimdRegShadowing:
8559 {
8560 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
8561 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
8562 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
8563 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
8564 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
8565 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
8566 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
8567 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
8568 else
8569 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
8570 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
8571 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
8572 continue;
8573 }
8574#endif
8575
8576 case kIemTbDbgEntryType_Label:
8577 {
8578 const char *pszName = "what_the_fudge";
8579 const char *pszComment = "";
8580 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
8581 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
8582 {
8583 case kIemNativeLabelType_Return: pszName = "Return"; break;
8584 case kIemNativeLabelType_ReturnBreak: pszName = "ReturnBreak"; break;
8585 case kIemNativeLabelType_ReturnWithFlags: pszName = "ReturnWithFlags"; break;
8586 case kIemNativeLabelType_NonZeroRetOrPassUp: pszName = "NonZeroRetOrPassUp"; break;
8587 case kIemNativeLabelType_RaiseDe: pszName = "RaiseDe"; break;
8588 case kIemNativeLabelType_RaiseUd: pszName = "RaiseUd"; break;
8589 case kIemNativeLabelType_RaiseSseRelated: pszName = "RaiseSseRelated"; break;
8590 case kIemNativeLabelType_RaiseAvxRelated: pszName = "RaiseAvxRelated"; break;
8591 case kIemNativeLabelType_RaiseNm: pszName = "RaiseNm"; break;
8592 case kIemNativeLabelType_RaiseGp0: pszName = "RaiseGp0"; break;
8593 case kIemNativeLabelType_RaiseMf: pszName = "RaiseMf"; break;
8594 case kIemNativeLabelType_RaiseXf: pszName = "RaiseXf"; break;
8595 case kIemNativeLabelType_ObsoleteTb: pszName = "ObsoleteTb"; break;
8596 case kIemNativeLabelType_NeedCsLimChecking: pszName = "NeedCsLimChecking"; break;
8597 case kIemNativeLabelType_CheckBranchMiss: pszName = "CheckBranchMiss"; break;
8598 case kIemNativeLabelType_If:
8599 pszName = "If";
8600 fNumbered = true;
8601 break;
8602 case kIemNativeLabelType_Else:
8603 pszName = "Else";
8604 fNumbered = true;
8605 pszComment = " ; regs state restored pre-if-block";
8606 break;
8607 case kIemNativeLabelType_Endif:
8608 pszName = "Endif";
8609 fNumbered = true;
8610 break;
8611 case kIemNativeLabelType_CheckIrq:
8612 pszName = "CheckIrq_CheckVM";
8613 fNumbered = true;
8614 break;
8615 case kIemNativeLabelType_TlbLookup:
8616 pszName = "TlbLookup";
8617 fNumbered = true;
8618 break;
8619 case kIemNativeLabelType_TlbMiss:
8620 pszName = "TlbMiss";
8621 fNumbered = true;
8622 break;
8623 case kIemNativeLabelType_TlbDone:
8624 pszName = "TlbDone";
8625 fNumbered = true;
8626 break;
8627 case kIemNativeLabelType_Invalid:
8628 case kIemNativeLabelType_End:
8629 break;
8630 }
8631 if (fNumbered)
8632 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
8633 else
8634 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
8635 continue;
8636 }
8637
8638 case kIemTbDbgEntryType_NativeOffset:
8639 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
8640 Assert(offDbgNativeNext > offNative);
8641 break;
8642
8643#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8644 case kIemTbDbgEntryType_DelayedPcUpdate:
8645 pHlp->pfnPrintf(pHlp,
8646 " Updating guest PC value by %u (cInstrSkipped=%u)\n",
8647 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
8648 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
8649 continue;
8650#endif
8651
8652 default:
8653 AssertFailed();
8654 }
8655 iDbgEntry++;
8656 break;
8657 }
8658 }
8659
8660 /*
8661 * Disassemble the next native instruction.
8662 */
8663 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
8664# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
8665 uint32_t cbInstr = sizeof(paNative[0]);
8666 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
8667 if (RT_SUCCESS(rc))
8668 {
8669# if defined(RT_ARCH_AMD64)
8670 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
8671 {
8672 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
8673 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
8674 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
8675 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
8676 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
8677 uInfo & 0x8000 ? "recompiled" : "todo");
8678 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
8679 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
8680 else
8681 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
8682 }
8683 else
8684# endif
8685 {
8686 const char *pszAnnotation = NULL;
8687# ifdef RT_ARCH_AMD64
8688 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8689 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8690 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8691 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8692 PCDISOPPARAM pMemOp;
8693 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))
8694 pMemOp = &Dis.Param1;
8695 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))
8696 pMemOp = &Dis.Param2;
8697 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))
8698 pMemOp = &Dis.Param3;
8699 else
8700 pMemOp = NULL;
8701 if ( pMemOp
8702 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
8703 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
8704 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
8705 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
8706
8707#elif defined(RT_ARCH_ARM64)
8708 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
8709 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8710 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8711# else
8712# error "Port me"
8713# endif
8714 if (pszAnnotation)
8715 {
8716 static unsigned const s_offAnnotation = 55;
8717 size_t const cchAnnotation = strlen(pszAnnotation);
8718 size_t cchDis = strlen(szDisBuf);
8719 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
8720 {
8721 if (cchDis < s_offAnnotation)
8722 {
8723 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
8724 cchDis = s_offAnnotation;
8725 }
8726 szDisBuf[cchDis++] = ' ';
8727 szDisBuf[cchDis++] = ';';
8728 szDisBuf[cchDis++] = ' ';
8729 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
8730 }
8731 }
8732 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
8733 }
8734 }
8735 else
8736 {
8737# if defined(RT_ARCH_AMD64)
8738 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
8739 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
8740# elif defined(RT_ARCH_ARM64)
8741 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
8742# else
8743# error "Port me"
8744# endif
8745 cbInstr = sizeof(paNative[0]);
8746 }
8747 offNative += cbInstr / sizeof(paNative[0]);
8748
8749# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
8750 cs_insn *pInstr;
8751 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
8752 (uintptr_t)pNativeCur, 1, &pInstr);
8753 if (cInstrs > 0)
8754 {
8755 Assert(cInstrs == 1);
8756 const char *pszAnnotation = NULL;
8757# if defined(RT_ARCH_ARM64)
8758 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
8759 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
8760 {
8761 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
8762 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
8763 char *psz = strchr(pInstr->op_str, '[');
8764 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
8765 {
8766 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
8767 int32_t off = -1;
8768 psz += 4;
8769 if (*psz == ']')
8770 off = 0;
8771 else if (*psz == ',')
8772 {
8773 psz = RTStrStripL(psz + 1);
8774 if (*psz == '#')
8775 off = RTStrToInt32(&psz[1]);
8776 /** @todo deal with index registers and LSL as well... */
8777 }
8778 if (off >= 0)
8779 pszAnnotation = iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
8780 }
8781 }
8782# endif
8783
8784 size_t const cchOp = strlen(pInstr->op_str);
8785# if defined(RT_ARCH_AMD64)
8786 if (pszAnnotation)
8787 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
8788 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
8789 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
8790 else
8791 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
8792 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
8793
8794# else
8795 if (pszAnnotation)
8796 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
8797 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
8798 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
8799 else
8800 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
8801 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
8802# endif
8803 offNative += pInstr->size / sizeof(*pNativeCur);
8804 cs_free(pInstr, cInstrs);
8805 }
8806 else
8807 {
8808# if defined(RT_ARCH_AMD64)
8809 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
8810 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
8811# else
8812 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
8813# endif
8814 offNative++;
8815 }
8816# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
8817 }
8818 }
8819 else
8820#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
8821 {
8822 /*
8823 * No debug info, just disassemble the x86 code and then the native code.
8824 *
8825 * First the guest code:
8826 */
8827 for (unsigned i = 0; i < pTb->cRanges; i++)
8828 {
8829 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
8830 + (pTb->aRanges[i].idxPhysPage == 0
8831 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
8832 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
8833 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
8834 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
8835 unsigned off = pTb->aRanges[i].offOpcodes;
8836 /** @todo this ain't working when crossing pages! */
8837 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
8838 while (off < cbOpcodes)
8839 {
8840 uint32_t cbInstr = 1;
8841 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
8842 &pTb->pabOpcodes[off], cbOpcodes - off,
8843 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
8844 if (RT_SUCCESS(rc))
8845 {
8846 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8847 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8848 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8849 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8850 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
8851 GCPhysPc += cbInstr;
8852 off += cbInstr;
8853 }
8854 else
8855 {
8856 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
8857 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
8858 break;
8859 }
8860 }
8861 }
8862
8863 /*
8864 * Then the native code:
8865 */
8866 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
8867 while (offNative < cNative)
8868 {
8869 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
8870# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
8871 uint32_t cbInstr = sizeof(paNative[0]);
8872 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
8873 if (RT_SUCCESS(rc))
8874 {
8875# if defined(RT_ARCH_AMD64)
8876 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
8877 {
8878 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
8879 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
8880 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
8881 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
8882 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
8883 uInfo & 0x8000 ? "recompiled" : "todo");
8884 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
8885 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
8886 else
8887 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
8888 }
8889 else
8890# endif
8891 {
8892# ifdef RT_ARCH_AMD64
8893 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
8894 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
8895 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8896 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8897# elif defined(RT_ARCH_ARM64)
8898 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
8899 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
8900 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
8901# else
8902# error "Port me"
8903# endif
8904 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
8905 }
8906 }
8907 else
8908 {
8909# if defined(RT_ARCH_AMD64)
8910 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
8911 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
8912# else
8913 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
8914# endif
8915 cbInstr = sizeof(paNative[0]);
8916 }
8917 offNative += cbInstr / sizeof(paNative[0]);
8918
8919# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
8920 cs_insn *pInstr;
8921 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
8922 (uintptr_t)pNativeCur, 1, &pInstr);
8923 if (cInstrs > 0)
8924 {
8925 Assert(cInstrs == 1);
8926# if defined(RT_ARCH_AMD64)
8927 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
8928 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
8929# else
8930 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
8931 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
8932# endif
8933 offNative += pInstr->size / sizeof(*pNativeCur);
8934 cs_free(pInstr, cInstrs);
8935 }
8936 else
8937 {
8938# if defined(RT_ARCH_AMD64)
8939 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
8940 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
8941# else
8942 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
8943# endif
8944 offNative++;
8945 }
8946# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
8947 }
8948 }
8949
8950#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
8951 /* Cleanup. */
8952 cs_close(&hDisasm);
8953#endif
8954}
8955
8956
8957/**
8958 * Recompiles the given threaded TB into a native one.
8959 *
8960 * In case of failure the translation block will be returned as-is.
8961 *
8962 * @returns pTb.
8963 * @param pVCpu The cross context virtual CPU structure of the calling
8964 * thread.
8965 * @param pTb The threaded translation to recompile to native.
8966 */
8967DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
8968{
8969 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
8970
8971 /*
8972 * The first time thru, we allocate the recompiler state, the other times
8973 * we just need to reset it before using it again.
8974 */
8975 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
8976 if (RT_LIKELY(pReNative))
8977 iemNativeReInit(pReNative, pTb);
8978 else
8979 {
8980 pReNative = iemNativeInit(pVCpu, pTb);
8981 AssertReturn(pReNative, pTb);
8982 }
8983
8984#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
8985 /*
8986 * First do liveness analysis. This is done backwards.
8987 */
8988 {
8989 uint32_t idxCall = pTb->Thrd.cCalls;
8990 if (idxCall <= pReNative->cLivenessEntriesAlloc)
8991 { /* likely */ }
8992 else
8993 {
8994 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
8995 while (idxCall > cAlloc)
8996 cAlloc *= 2;
8997 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
8998 AssertReturn(pvNew, pTb);
8999 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
9000 pReNative->cLivenessEntriesAlloc = cAlloc;
9001 }
9002 AssertReturn(idxCall > 0, pTb);
9003 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
9004
9005 /* The initial (final) entry. */
9006 idxCall--;
9007 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
9008
9009 /* Loop backwards thru the calls and fill in the other entries. */
9010 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
9011 while (idxCall > 0)
9012 {
9013 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
9014 if (pfnLiveness)
9015 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
9016 else
9017 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
9018 pCallEntry--;
9019 idxCall--;
9020 }
9021
9022# ifdef VBOX_WITH_STATISTICS
9023 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
9024 to 'clobbered' rather that 'input'. */
9025 /** @todo */
9026# endif
9027 }
9028#endif
9029
9030 /*
9031 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
9032 * for aborting if an error happens.
9033 */
9034 uint32_t cCallsLeft = pTb->Thrd.cCalls;
9035#ifdef LOG_ENABLED
9036 uint32_t const cCallsOrg = cCallsLeft;
9037#endif
9038 uint32_t off = 0;
9039 int rc = VINF_SUCCESS;
9040 IEMNATIVE_TRY_SETJMP(pReNative, rc)
9041 {
9042 /*
9043 * Emit prolog code (fixed).
9044 */
9045 off = iemNativeEmitProlog(pReNative, off);
9046
9047 /*
9048 * Convert the calls to native code.
9049 */
9050#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9051 int32_t iGstInstr = -1;
9052#endif
9053#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
9054 uint32_t cThreadedCalls = 0;
9055 uint32_t cRecompiledCalls = 0;
9056#endif
9057#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9058 uint32_t idxCurCall = 0;
9059#endif
9060 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
9061 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
9062 while (cCallsLeft-- > 0)
9063 {
9064 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
9065#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9066 pReNative->idxCurCall = idxCurCall;
9067#endif
9068
9069 /*
9070 * Debug info, assembly markup and statistics.
9071 */
9072#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
9073 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
9074 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
9075#endif
9076#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9077 iemNativeDbgInfoAddNativeOffset(pReNative, off);
9078 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
9079 {
9080 if (iGstInstr < (int32_t)pTb->cInstructions)
9081 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
9082 else
9083 Assert(iGstInstr == pTb->cInstructions);
9084 iGstInstr = pCallEntry->idxInstr;
9085 }
9086 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
9087#endif
9088#if defined(VBOX_STRICT)
9089 off = iemNativeEmitMarker(pReNative, off,
9090 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
9091#endif
9092#if defined(VBOX_STRICT)
9093 iemNativeRegAssertSanity(pReNative);
9094#endif
9095#ifdef VBOX_WITH_STATISTICS
9096 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
9097#endif
9098
9099 /*
9100 * Actual work.
9101 */
9102 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
9103 pfnRecom ? "(recompiled)" : "(todo)"));
9104 if (pfnRecom) /** @todo stats on this. */
9105 {
9106 off = pfnRecom(pReNative, off, pCallEntry);
9107 STAM_REL_STATS({cRecompiledCalls++;});
9108 }
9109 else
9110 {
9111 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
9112 STAM_REL_STATS({cThreadedCalls++;});
9113 }
9114 Assert(off <= pReNative->cInstrBufAlloc);
9115 Assert(pReNative->cCondDepth == 0);
9116
9117#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
9118 if (LogIs2Enabled())
9119 {
9120 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
9121# ifndef IEMLIVENESS_EXTENDED_LAYOUT
9122 static const char s_achState[] = "CUXI";
9123# else
9124 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
9125# endif
9126
9127 char szGpr[17];
9128 for (unsigned i = 0; i < 16; i++)
9129 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
9130 szGpr[16] = '\0';
9131
9132 char szSegBase[X86_SREG_COUNT + 1];
9133 char szSegLimit[X86_SREG_COUNT + 1];
9134 char szSegAttrib[X86_SREG_COUNT + 1];
9135 char szSegSel[X86_SREG_COUNT + 1];
9136 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
9137 {
9138 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
9139 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
9140 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
9141 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
9142 }
9143 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
9144 = szSegSel[X86_SREG_COUNT] = '\0';
9145
9146 char szEFlags[8];
9147 for (unsigned i = 0; i < 7; i++)
9148 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
9149 szEFlags[7] = '\0';
9150
9151 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
9152 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
9153 }
9154#endif
9155
9156 /*
9157 * Advance.
9158 */
9159 pCallEntry++;
9160#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9161 idxCurCall++;
9162#endif
9163 }
9164
9165 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
9166 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
9167 if (!cThreadedCalls)
9168 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
9169
9170 /*
9171 * Emit the epilog code.
9172 */
9173 uint32_t idxReturnLabel;
9174 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
9175
9176 /*
9177 * Generate special jump labels.
9178 */
9179 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
9180 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
9181 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
9182 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
9183
9184 /*
9185 * Generate simple TB tail labels that just calls a help with a pVCpu
9186 * arg and either return or longjmps/throws a non-zero status.
9187 *
9188 * The array entries must be ordered by enmLabel value so we can index
9189 * using fTailLabels bit numbers.
9190 */
9191 typedef IEM_DECL_NATIVE_HLP_PTR(int, PFNIEMNATIVESIMPLETAILLABELCALL,(PVMCPUCC pVCpu));
9192 static struct
9193 {
9194 IEMNATIVELABELTYPE enmLabel;
9195 PFNIEMNATIVESIMPLETAILLABELCALL pfnCallback;
9196 } const g_aSimpleTailLabels[] =
9197 {
9198 { kIemNativeLabelType_Invalid, NULL },
9199 { kIemNativeLabelType_RaiseDe, iemNativeHlpExecRaiseDe },
9200 { kIemNativeLabelType_RaiseUd, iemNativeHlpExecRaiseUd },
9201 { kIemNativeLabelType_RaiseSseRelated, iemNativeHlpExecRaiseSseRelated },
9202 { kIemNativeLabelType_RaiseAvxRelated, iemNativeHlpExecRaiseAvxRelated },
9203 { kIemNativeLabelType_RaiseNm, iemNativeHlpExecRaiseNm },
9204 { kIemNativeLabelType_RaiseGp0, iemNativeHlpExecRaiseGp0 },
9205 { kIemNativeLabelType_RaiseMf, iemNativeHlpExecRaiseMf },
9206 { kIemNativeLabelType_RaiseXf, iemNativeHlpExecRaiseXf },
9207 { kIemNativeLabelType_ObsoleteTb, iemNativeHlpObsoleteTb },
9208 { kIemNativeLabelType_NeedCsLimChecking, iemNativeHlpNeedCsLimChecking },
9209 { kIemNativeLabelType_CheckBranchMiss, iemNativeHlpCheckBranchMiss },
9210 };
9211 AssertCompile(RT_ELEMENTS(g_aSimpleTailLabels) == (unsigned)kIemNativeLabelType_LastSimple + 1U);
9212 AssertCompile(kIemNativeLabelType_Invalid == 0);
9213 uint64_t fTailLabels = pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastSimple + 1U) - 2U);
9214 if (fTailLabels)
9215 {
9216 do
9217 {
9218 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)(ASMBitFirstSetU64(fTailLabels) - 1U);
9219 fTailLabels &= ~RT_BIT_64(enmLabel);
9220 Assert(g_aSimpleTailLabels[enmLabel].enmLabel == enmLabel);
9221
9222 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
9223 Assert(idxLabel != UINT32_MAX);
9224 if (idxLabel != UINT32_MAX)
9225 {
9226 iemNativeLabelDefine(pReNative, idxLabel, off);
9227
9228 /* int pfnCallback(PVMCPUCC pVCpu) */
9229 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9230 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_aSimpleTailLabels[enmLabel].pfnCallback);
9231
9232 /* jump back to the return sequence. */
9233 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
9234 }
9235
9236 } while (fTailLabels);
9237 }
9238 }
9239 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
9240 {
9241 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
9242 return pTb;
9243 }
9244 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
9245 Assert(off <= pReNative->cInstrBufAlloc);
9246
9247 /*
9248 * Make sure all labels has been defined.
9249 */
9250 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
9251#ifdef VBOX_STRICT
9252 uint32_t const cLabels = pReNative->cLabels;
9253 for (uint32_t i = 0; i < cLabels; i++)
9254 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
9255#endif
9256
9257 /*
9258 * Allocate executable memory, copy over the code we've generated.
9259 */
9260 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
9261 if (pTbAllocator->pDelayedFreeHead)
9262 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
9263
9264 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
9265 AssertReturn(paFinalInstrBuf, pTb);
9266 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
9267
9268 /*
9269 * Apply fixups.
9270 */
9271 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
9272 uint32_t const cFixups = pReNative->cFixups;
9273 for (uint32_t i = 0; i < cFixups; i++)
9274 {
9275 Assert(paFixups[i].off < off);
9276 Assert(paFixups[i].idxLabel < cLabels);
9277 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
9278 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
9279 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
9280 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
9281 switch (paFixups[i].enmType)
9282 {
9283#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
9284 case kIemNativeFixupType_Rel32:
9285 Assert(paFixups[i].off + 4 <= off);
9286 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9287 continue;
9288
9289#elif defined(RT_ARCH_ARM64)
9290 case kIemNativeFixupType_RelImm26At0:
9291 {
9292 Assert(paFixups[i].off < off);
9293 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9294 Assert(offDisp >= -262144 && offDisp < 262144);
9295 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
9296 continue;
9297 }
9298
9299 case kIemNativeFixupType_RelImm19At5:
9300 {
9301 Assert(paFixups[i].off < off);
9302 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9303 Assert(offDisp >= -262144 && offDisp < 262144);
9304 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
9305 continue;
9306 }
9307
9308 case kIemNativeFixupType_RelImm14At5:
9309 {
9310 Assert(paFixups[i].off < off);
9311 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9312 Assert(offDisp >= -8192 && offDisp < 8192);
9313 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
9314 continue;
9315 }
9316
9317#endif
9318 case kIemNativeFixupType_Invalid:
9319 case kIemNativeFixupType_End:
9320 break;
9321 }
9322 AssertFailed();
9323 }
9324
9325 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
9326 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
9327
9328 /*
9329 * Convert the translation block.
9330 */
9331 RTMemFree(pTb->Thrd.paCalls);
9332 pTb->Native.paInstructions = paFinalInstrBuf;
9333 pTb->Native.cInstructions = off;
9334 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
9335#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9336 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
9337 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
9338#endif
9339
9340 Assert(pTbAllocator->cThreadedTbs > 0);
9341 pTbAllocator->cThreadedTbs -= 1;
9342 pTbAllocator->cNativeTbs += 1;
9343 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
9344
9345#ifdef LOG_ENABLED
9346 /*
9347 * Disassemble to the log if enabled.
9348 */
9349 if (LogIs3Enabled())
9350 {
9351 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
9352 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
9353# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
9354 RTLogFlush(NULL);
9355# endif
9356 }
9357#endif
9358 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
9359
9360 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
9361 return pTb;
9362}
9363
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette