VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 104034

Last change on this file since 104034 was 104034, checked in by vboxsync, 8 months ago

VMM/IEM: Implement experimental (disabled by default) delaying writeback of dirty guest registers (for GPRs only at the moment), bugref:10629

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 438.6 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 104034 2024-03-25 10:07:26Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMN8veRecompilerTlbLookup.h"
103#include "IEMNativeFunctions.h"
104
105
106/*
107 * Narrow down configs here to avoid wasting time on unused configs here.
108 * Note! Same checks in IEMAllThrdRecompiler.cpp.
109 */
110
111#ifndef IEM_WITH_CODE_TLB
112# error The code TLB must be enabled for the recompiler.
113#endif
114
115#ifndef IEM_WITH_DATA_TLB
116# error The data TLB must be enabled for the recompiler.
117#endif
118
119#ifndef IEM_WITH_SETJMP
120# error The setjmp approach must be enabled for the recompiler.
121#endif
122
123/** @todo eliminate this clang build hack. */
124#if RT_CLANG_PREREQ(4, 0)
125# pragma GCC diagnostic ignored "-Wunused-function"
126#endif
127
128
129/*********************************************************************************************************************************
130* Internal Functions *
131*********************************************************************************************************************************/
132#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
133static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
134#endif
135DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
136DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
137 IEMNATIVEGSTREG enmGstReg, uint32_t off);
138DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
139
140
141/*********************************************************************************************************************************
142* Executable Memory Allocator *
143*********************************************************************************************************************************/
144/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
145 * Use an alternative chunk sub-allocator that does store internal data
146 * in the chunk.
147 *
148 * Using the RTHeapSimple is not practial on newer darwin systems where
149 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
150 * memory. We would have to change the protection of the whole chunk for
151 * every call to RTHeapSimple, which would be rather expensive.
152 *
153 * This alternative implemenation let restrict page protection modifications
154 * to the pages backing the executable memory we just allocated.
155 */
156#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
157/** The chunk sub-allocation unit size in bytes. */
158#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
159/** The chunk sub-allocation unit size as a shift factor. */
160#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
161
162#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
163# ifdef IEMNATIVE_USE_GDB_JIT
164# define IEMNATIVE_USE_GDB_JIT_ET_DYN
165
166/** GDB JIT: Code entry. */
167typedef struct GDBJITCODEENTRY
168{
169 struct GDBJITCODEENTRY *pNext;
170 struct GDBJITCODEENTRY *pPrev;
171 uint8_t *pbSymFile;
172 uint64_t cbSymFile;
173} GDBJITCODEENTRY;
174
175/** GDB JIT: Actions. */
176typedef enum GDBJITACTIONS : uint32_t
177{
178 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
179} GDBJITACTIONS;
180
181/** GDB JIT: Descriptor. */
182typedef struct GDBJITDESCRIPTOR
183{
184 uint32_t uVersion;
185 GDBJITACTIONS enmAction;
186 GDBJITCODEENTRY *pRelevant;
187 GDBJITCODEENTRY *pHead;
188 /** Our addition: */
189 GDBJITCODEENTRY *pTail;
190} GDBJITDESCRIPTOR;
191
192/** GDB JIT: Our simple symbol file data. */
193typedef struct GDBJITSYMFILE
194{
195 Elf64_Ehdr EHdr;
196# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
197 Elf64_Shdr aShdrs[5];
198# else
199 Elf64_Shdr aShdrs[7];
200 Elf64_Phdr aPhdrs[2];
201# endif
202 /** The dwarf ehframe data for the chunk. */
203 uint8_t abEhFrame[512];
204 char szzStrTab[128];
205 Elf64_Sym aSymbols[3];
206# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
207 Elf64_Sym aDynSyms[2];
208 Elf64_Dyn aDyn[6];
209# endif
210} GDBJITSYMFILE;
211
212extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
213extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
214
215/** Init once for g_IemNativeGdbJitLock. */
216static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
217/** Init once for the critical section. */
218static RTCRITSECT g_IemNativeGdbJitLock;
219
220/** GDB reads the info here. */
221GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
222
223/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
224DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
225{
226 ASMNopPause();
227}
228
229/** @callback_method_impl{FNRTONCE} */
230static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
231{
232 RT_NOREF(pvUser);
233 return RTCritSectInit(&g_IemNativeGdbJitLock);
234}
235
236
237# endif /* IEMNATIVE_USE_GDB_JIT */
238
239/**
240 * Per-chunk unwind info for non-windows hosts.
241 */
242typedef struct IEMEXECMEMCHUNKEHFRAME
243{
244# ifdef IEMNATIVE_USE_LIBUNWIND
245 /** The offset of the FDA into abEhFrame. */
246 uintptr_t offFda;
247# else
248 /** 'struct object' storage area. */
249 uint8_t abObject[1024];
250# endif
251# ifdef IEMNATIVE_USE_GDB_JIT
252# if 0
253 /** The GDB JIT 'symbol file' data. */
254 GDBJITSYMFILE GdbJitSymFile;
255# endif
256 /** The GDB JIT list entry. */
257 GDBJITCODEENTRY GdbJitEntry;
258# endif
259 /** The dwarf ehframe data for the chunk. */
260 uint8_t abEhFrame[512];
261} IEMEXECMEMCHUNKEHFRAME;
262/** Pointer to per-chunk info info for non-windows hosts. */
263typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
264#endif
265
266
267/**
268 * An chunk of executable memory.
269 */
270typedef struct IEMEXECMEMCHUNK
271{
272#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
273 /** Number of free items in this chunk. */
274 uint32_t cFreeUnits;
275 /** Hint were to start searching for free space in the allocation bitmap. */
276 uint32_t idxFreeHint;
277#else
278 /** The heap handle. */
279 RTHEAPSIMPLE hHeap;
280#endif
281 /** Pointer to the chunk. */
282 void *pvChunk;
283#ifdef IN_RING3
284 /**
285 * Pointer to the unwind information.
286 *
287 * This is used during C++ throw and longjmp (windows and probably most other
288 * platforms). Some debuggers (windbg) makes use of it as well.
289 *
290 * Windows: This is allocated from hHeap on windows because (at least for
291 * AMD64) the UNWIND_INFO structure address in the
292 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
293 *
294 * Others: Allocated from the regular heap to avoid unnecessary executable data
295 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
296 void *pvUnwindInfo;
297#elif defined(IN_RING0)
298 /** Allocation handle. */
299 RTR0MEMOBJ hMemObj;
300#endif
301} IEMEXECMEMCHUNK;
302/** Pointer to a memory chunk. */
303typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
304
305
306/**
307 * Executable memory allocator for the native recompiler.
308 */
309typedef struct IEMEXECMEMALLOCATOR
310{
311 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
312 uint32_t uMagic;
313
314 /** The chunk size. */
315 uint32_t cbChunk;
316 /** The maximum number of chunks. */
317 uint32_t cMaxChunks;
318 /** The current number of chunks. */
319 uint32_t cChunks;
320 /** Hint where to start looking for available memory. */
321 uint32_t idxChunkHint;
322 /** Statistics: Current number of allocations. */
323 uint32_t cAllocations;
324
325 /** The total amount of memory available. */
326 uint64_t cbTotal;
327 /** Total amount of free memory. */
328 uint64_t cbFree;
329 /** Total amount of memory allocated. */
330 uint64_t cbAllocated;
331
332#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
333 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
334 *
335 * Since the chunk size is a power of two and the minimum chunk size is a lot
336 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
337 * require a whole number of uint64_t elements in the allocation bitmap. So,
338 * for sake of simplicity, they are allocated as one continous chunk for
339 * simplicity/laziness. */
340 uint64_t *pbmAlloc;
341 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
342 uint32_t cUnitsPerChunk;
343 /** Number of bitmap elements per chunk (for quickly locating the bitmap
344 * portion corresponding to an chunk). */
345 uint32_t cBitmapElementsPerChunk;
346#else
347 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
348 * @{ */
349 /** The size of the heap internal block header. This is used to adjust the
350 * request memory size to make sure there is exacly enough room for a header at
351 * the end of the blocks we allocate before the next 64 byte alignment line. */
352 uint32_t cbHeapBlockHdr;
353 /** The size of initial heap allocation required make sure the first
354 * allocation is correctly aligned. */
355 uint32_t cbHeapAlignTweak;
356 /** The alignment tweak allocation address. */
357 void *pvAlignTweak;
358 /** @} */
359#endif
360
361#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
362 /** Pointer to the array of unwind info running parallel to aChunks (same
363 * allocation as this structure, located after the bitmaps).
364 * (For Windows, the structures must reside in 32-bit RVA distance to the
365 * actual chunk, so they are allocated off the chunk.) */
366 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
367#endif
368
369 /** The allocation chunks. */
370 RT_FLEXIBLE_ARRAY_EXTENSION
371 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
372} IEMEXECMEMALLOCATOR;
373/** Pointer to an executable memory allocator. */
374typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
375
376/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
377#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
378
379
380static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
381
382
383/**
384 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
385 * the heap statistics.
386 */
387static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
388 uint32_t cbReq, uint32_t idxChunk)
389{
390 pExecMemAllocator->cAllocations += 1;
391 pExecMemAllocator->cbAllocated += cbReq;
392#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
393 pExecMemAllocator->cbFree -= cbReq;
394#else
395 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
396#endif
397 pExecMemAllocator->idxChunkHint = idxChunk;
398
399#ifdef RT_OS_DARWIN
400 /*
401 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
402 * on darwin. So, we mark the pages returned as read+write after alloc and
403 * expect the caller to call iemExecMemAllocatorReadyForUse when done
404 * writing to the allocation.
405 *
406 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
407 * for details.
408 */
409 /** @todo detect if this is necessary... it wasn't required on 10.15 or
410 * whatever older version it was. */
411 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
412 AssertRC(rc);
413#endif
414
415 return pvRet;
416}
417
418
419#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
420static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
421 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
422{
423 /*
424 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
425 */
426 Assert(!(cToScan & 63));
427 Assert(!(idxFirst & 63));
428 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
429 pbmAlloc += idxFirst / 64;
430
431 /*
432 * Scan the bitmap for cReqUnits of consequtive clear bits
433 */
434 /** @todo This can probably be done more efficiently for non-x86 systems. */
435 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
436 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
437 {
438 uint32_t idxAddBit = 1;
439 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
440 idxAddBit++;
441 if (idxAddBit >= cReqUnits)
442 {
443 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
444
445 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
446 pChunk->cFreeUnits -= cReqUnits;
447 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
448
449 void * const pvRet = (uint8_t *)pChunk->pvChunk
450 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
451
452 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
453 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
454 }
455
456 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
457 }
458 return NULL;
459}
460#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
461
462
463static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
464{
465#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
466 /*
467 * Figure out how much to allocate.
468 */
469 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
470 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
471 {
472 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
473 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
474 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
475 {
476 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
477 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
478 if (pvRet)
479 return pvRet;
480 }
481 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
482 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
483 cReqUnits, idxChunk);
484 }
485#else
486 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
487 if (pvRet)
488 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
489#endif
490 return NULL;
491
492}
493
494
495/**
496 * Allocates @a cbReq bytes of executable memory.
497 *
498 * @returns Pointer to the memory, NULL if out of memory or other problem
499 * encountered.
500 * @param pVCpu The cross context virtual CPU structure of the calling
501 * thread.
502 * @param cbReq How many bytes are required.
503 */
504static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
505{
506 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
507 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
508 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
509
510
511 for (unsigned iIteration = 0;; iIteration++)
512 {
513 /*
514 * Adjust the request size so it'll fit the allocator alignment/whatnot.
515 *
516 * For the RTHeapSimple allocator this means to follow the logic described
517 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
518 * existing chunks if we think we've got sufficient free memory around.
519 *
520 * While for the alternative one we just align it up to a whole unit size.
521 */
522#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
523 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
524#else
525 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
526#endif
527 if (cbReq <= pExecMemAllocator->cbFree)
528 {
529 uint32_t const cChunks = pExecMemAllocator->cChunks;
530 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
531 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
532 {
533 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
534 if (pvRet)
535 return pvRet;
536 }
537 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
538 {
539 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
540 if (pvRet)
541 return pvRet;
542 }
543 }
544
545 /*
546 * Can we grow it with another chunk?
547 */
548 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
549 {
550 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
551 AssertLogRelRCReturn(rc, NULL);
552
553 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
554 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
555 if (pvRet)
556 return pvRet;
557 AssertFailed();
558 }
559
560 /*
561 * Try prune native TBs once.
562 */
563 if (iIteration == 0)
564 iemTbAllocatorFreeupNativeSpace(pVCpu, cbReq / sizeof(IEMNATIVEINSTR));
565 else
566 {
567 /** @todo stats... */
568 return NULL;
569 }
570 }
571
572}
573
574
575/** This is a hook that we may need later for changing memory protection back
576 * to readonly+exec */
577static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
578{
579#ifdef RT_OS_DARWIN
580 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
581 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
582 AssertRC(rc); RT_NOREF(pVCpu);
583
584 /*
585 * Flush the instruction cache:
586 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
587 */
588 /* sys_dcache_flush(pv, cb); - not necessary */
589 sys_icache_invalidate(pv, cb);
590#else
591 RT_NOREF(pVCpu, pv, cb);
592#endif
593}
594
595
596/**
597 * Frees executable memory.
598 */
599void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
600{
601 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
602 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
603 Assert(pv);
604#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
605 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
606#else
607 Assert(!((uintptr_t)pv & 63));
608#endif
609
610 /* Align the size as we did when allocating the block. */
611#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
612 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
613#else
614 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
615#endif
616
617 /* Free it / assert sanity. */
618#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
619 uint32_t const cChunks = pExecMemAllocator->cChunks;
620 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
621 bool fFound = false;
622 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
623 {
624 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
625 fFound = offChunk < cbChunk;
626 if (fFound)
627 {
628#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
629 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
630 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
631
632 /* Check that it's valid and free it. */
633 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
634 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
635 for (uint32_t i = 1; i < cReqUnits; i++)
636 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
637 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
638
639 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
640 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
641
642 /* Update the stats. */
643 pExecMemAllocator->cbAllocated -= cb;
644 pExecMemAllocator->cbFree += cb;
645 pExecMemAllocator->cAllocations -= 1;
646 return;
647#else
648 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
649 break;
650#endif
651 }
652 }
653# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
654 AssertFailed();
655# else
656 Assert(fFound);
657# endif
658#endif
659
660#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
661 /* Update stats while cb is freshly calculated.*/
662 pExecMemAllocator->cbAllocated -= cb;
663 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
664 pExecMemAllocator->cAllocations -= 1;
665
666 /* Free it. */
667 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
668#endif
669}
670
671
672
673#ifdef IN_RING3
674# ifdef RT_OS_WINDOWS
675
676/**
677 * Initializes the unwind info structures for windows hosts.
678 */
679static int
680iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
681 void *pvChunk, uint32_t idxChunk)
682{
683 RT_NOREF(pVCpu);
684
685 /*
686 * The AMD64 unwind opcodes.
687 *
688 * This is a program that starts with RSP after a RET instruction that
689 * ends up in recompiled code, and the operations we describe here will
690 * restore all non-volatile registers and bring RSP back to where our
691 * RET address is. This means it's reverse order from what happens in
692 * the prologue.
693 *
694 * Note! Using a frame register approach here both because we have one
695 * and but mainly because the UWOP_ALLOC_LARGE argument values
696 * would be a pain to write initializers for. On the positive
697 * side, we're impervious to changes in the the stack variable
698 * area can can deal with dynamic stack allocations if necessary.
699 */
700 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
701 {
702 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
703 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
704 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
705 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
706 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
707 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
708 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
709 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
710 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
711 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
712 };
713 union
714 {
715 IMAGE_UNWIND_INFO Info;
716 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
717 } s_UnwindInfo =
718 {
719 {
720 /* .Version = */ 1,
721 /* .Flags = */ 0,
722 /* .SizeOfProlog = */ 16, /* whatever */
723 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
724 /* .FrameRegister = */ X86_GREG_xBP,
725 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
726 }
727 };
728 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
729 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
730
731 /*
732 * Calc how much space we need and allocate it off the exec heap.
733 */
734 unsigned const cFunctionEntries = 1;
735 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
736 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
737# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
738 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
739 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
740 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
741# else
742 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
743 - pExecMemAllocator->cbHeapBlockHdr;
744 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
745 32 /*cbAlignment*/);
746# endif
747 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
748 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
749
750 /*
751 * Initialize the structures.
752 */
753 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
754
755 paFunctions[0].BeginAddress = 0;
756 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
757 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
758
759 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
760 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
761
762 /*
763 * Register it.
764 */
765 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
766 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
767
768 return VINF_SUCCESS;
769}
770
771
772# else /* !RT_OS_WINDOWS */
773
774/**
775 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
776 */
777DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
778{
779 if (iValue >= 64)
780 {
781 Assert(iValue < 0x2000);
782 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
783 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
784 }
785 else if (iValue >= 0)
786 *Ptr.pb++ = (uint8_t)iValue;
787 else if (iValue > -64)
788 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
789 else
790 {
791 Assert(iValue > -0x2000);
792 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
793 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
794 }
795 return Ptr;
796}
797
798
799/**
800 * Emits an ULEB128 encoded value (up to 64-bit wide).
801 */
802DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
803{
804 while (uValue >= 0x80)
805 {
806 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
807 uValue >>= 7;
808 }
809 *Ptr.pb++ = (uint8_t)uValue;
810 return Ptr;
811}
812
813
814/**
815 * Emits a CFA rule as register @a uReg + offset @a off.
816 */
817DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
818{
819 *Ptr.pb++ = DW_CFA_def_cfa;
820 Ptr = iemDwarfPutUleb128(Ptr, uReg);
821 Ptr = iemDwarfPutUleb128(Ptr, off);
822 return Ptr;
823}
824
825
826/**
827 * Emits a register (@a uReg) save location:
828 * CFA + @a off * data_alignment_factor
829 */
830DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
831{
832 if (uReg < 0x40)
833 *Ptr.pb++ = DW_CFA_offset | uReg;
834 else
835 {
836 *Ptr.pb++ = DW_CFA_offset_extended;
837 Ptr = iemDwarfPutUleb128(Ptr, uReg);
838 }
839 Ptr = iemDwarfPutUleb128(Ptr, off);
840 return Ptr;
841}
842
843
844# if 0 /* unused */
845/**
846 * Emits a register (@a uReg) save location, using signed offset:
847 * CFA + @a offSigned * data_alignment_factor
848 */
849DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
850{
851 *Ptr.pb++ = DW_CFA_offset_extended_sf;
852 Ptr = iemDwarfPutUleb128(Ptr, uReg);
853 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
854 return Ptr;
855}
856# endif
857
858
859/**
860 * Initializes the unwind info section for non-windows hosts.
861 */
862static int
863iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
864 void *pvChunk, uint32_t idxChunk)
865{
866 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
867 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
868
869 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
870
871 /*
872 * Generate the CIE first.
873 */
874# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
875 uint8_t const iDwarfVer = 3;
876# else
877 uint8_t const iDwarfVer = 4;
878# endif
879 RTPTRUNION const PtrCie = Ptr;
880 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
881 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
882 *Ptr.pb++ = iDwarfVer; /* DwARF version */
883 *Ptr.pb++ = 0; /* Augmentation. */
884 if (iDwarfVer >= 4)
885 {
886 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
887 *Ptr.pb++ = 0; /* Segment selector size. */
888 }
889# ifdef RT_ARCH_AMD64
890 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
891# else
892 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
893# endif
894 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
895# ifdef RT_ARCH_AMD64
896 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
897# elif defined(RT_ARCH_ARM64)
898 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
899# else
900# error "port me"
901# endif
902 /* Initial instructions: */
903# ifdef RT_ARCH_AMD64
904 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
905 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
906 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
907 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
908 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
909 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
910 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
911 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
912# elif defined(RT_ARCH_ARM64)
913# if 1
914 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
915# else
916 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
917# endif
918 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
919 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
920 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
921 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
922 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
923 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
924 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
925 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
926 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
927 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
928 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
929 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
930 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
931 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
932# else
933# error "port me"
934# endif
935 while ((Ptr.u - PtrCie.u) & 3)
936 *Ptr.pb++ = DW_CFA_nop;
937 /* Finalize the CIE size. */
938 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
939
940 /*
941 * Generate an FDE for the whole chunk area.
942 */
943# ifdef IEMNATIVE_USE_LIBUNWIND
944 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
945# endif
946 RTPTRUNION const PtrFde = Ptr;
947 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
948 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
949 Ptr.pu32++;
950 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
951 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
952# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
953 *Ptr.pb++ = DW_CFA_nop;
954# endif
955 while ((Ptr.u - PtrFde.u) & 3)
956 *Ptr.pb++ = DW_CFA_nop;
957 /* Finalize the FDE size. */
958 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
959
960 /* Terminator entry. */
961 *Ptr.pu32++ = 0;
962 *Ptr.pu32++ = 0; /* just to be sure... */
963 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
964
965 /*
966 * Register it.
967 */
968# ifdef IEMNATIVE_USE_LIBUNWIND
969 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
970# else
971 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
972 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
973# endif
974
975# ifdef IEMNATIVE_USE_GDB_JIT
976 /*
977 * Now for telling GDB about this (experimental).
978 *
979 * This seems to work best with ET_DYN.
980 */
981 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
982# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
983 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
984 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
985# else
986 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
987 - pExecMemAllocator->cbHeapBlockHdr;
988 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
989# endif
990 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
991 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
992
993 RT_ZERO(*pSymFile);
994
995 /*
996 * The ELF header:
997 */
998 pSymFile->EHdr.e_ident[0] = ELFMAG0;
999 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1000 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1001 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1002 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1003 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1004 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1005 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1006# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1007 pSymFile->EHdr.e_type = ET_DYN;
1008# else
1009 pSymFile->EHdr.e_type = ET_REL;
1010# endif
1011# ifdef RT_ARCH_AMD64
1012 pSymFile->EHdr.e_machine = EM_AMD64;
1013# elif defined(RT_ARCH_ARM64)
1014 pSymFile->EHdr.e_machine = EM_AARCH64;
1015# else
1016# error "port me"
1017# endif
1018 pSymFile->EHdr.e_version = 1; /*?*/
1019 pSymFile->EHdr.e_entry = 0;
1020# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1021 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1022# else
1023 pSymFile->EHdr.e_phoff = 0;
1024# endif
1025 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1026 pSymFile->EHdr.e_flags = 0;
1027 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1028# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1029 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1030 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1031# else
1032 pSymFile->EHdr.e_phentsize = 0;
1033 pSymFile->EHdr.e_phnum = 0;
1034# endif
1035 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1036 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1037 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1038
1039 uint32_t offStrTab = 0;
1040#define APPEND_STR(a_szStr) do { \
1041 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1042 offStrTab += sizeof(a_szStr); \
1043 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1044 } while (0)
1045#define APPEND_STR_FMT(a_szStr, ...) do { \
1046 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1047 offStrTab++; \
1048 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1049 } while (0)
1050
1051 /*
1052 * Section headers.
1053 */
1054 /* Section header #0: NULL */
1055 unsigned i = 0;
1056 APPEND_STR("");
1057 RT_ZERO(pSymFile->aShdrs[i]);
1058 i++;
1059
1060 /* Section header: .eh_frame */
1061 pSymFile->aShdrs[i].sh_name = offStrTab;
1062 APPEND_STR(".eh_frame");
1063 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1064 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1065# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1066 pSymFile->aShdrs[i].sh_offset
1067 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1068# else
1069 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1070 pSymFile->aShdrs[i].sh_offset = 0;
1071# endif
1072
1073 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1074 pSymFile->aShdrs[i].sh_link = 0;
1075 pSymFile->aShdrs[i].sh_info = 0;
1076 pSymFile->aShdrs[i].sh_addralign = 1;
1077 pSymFile->aShdrs[i].sh_entsize = 0;
1078 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1079 i++;
1080
1081 /* Section header: .shstrtab */
1082 unsigned const iShStrTab = i;
1083 pSymFile->EHdr.e_shstrndx = iShStrTab;
1084 pSymFile->aShdrs[i].sh_name = offStrTab;
1085 APPEND_STR(".shstrtab");
1086 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1087 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1088# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1089 pSymFile->aShdrs[i].sh_offset
1090 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1091# else
1092 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1093 pSymFile->aShdrs[i].sh_offset = 0;
1094# endif
1095 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1096 pSymFile->aShdrs[i].sh_link = 0;
1097 pSymFile->aShdrs[i].sh_info = 0;
1098 pSymFile->aShdrs[i].sh_addralign = 1;
1099 pSymFile->aShdrs[i].sh_entsize = 0;
1100 i++;
1101
1102 /* Section header: .symbols */
1103 pSymFile->aShdrs[i].sh_name = offStrTab;
1104 APPEND_STR(".symtab");
1105 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1106 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1107 pSymFile->aShdrs[i].sh_offset
1108 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1109 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1110 pSymFile->aShdrs[i].sh_link = iShStrTab;
1111 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1112 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1113 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1114 i++;
1115
1116# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1117 /* Section header: .symbols */
1118 pSymFile->aShdrs[i].sh_name = offStrTab;
1119 APPEND_STR(".dynsym");
1120 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1121 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1122 pSymFile->aShdrs[i].sh_offset
1123 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1124 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1125 pSymFile->aShdrs[i].sh_link = iShStrTab;
1126 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1127 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1128 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1129 i++;
1130# endif
1131
1132# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1133 /* Section header: .dynamic */
1134 pSymFile->aShdrs[i].sh_name = offStrTab;
1135 APPEND_STR(".dynamic");
1136 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1137 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1138 pSymFile->aShdrs[i].sh_offset
1139 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1140 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1141 pSymFile->aShdrs[i].sh_link = iShStrTab;
1142 pSymFile->aShdrs[i].sh_info = 0;
1143 pSymFile->aShdrs[i].sh_addralign = 1;
1144 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1145 i++;
1146# endif
1147
1148 /* Section header: .text */
1149 unsigned const iShText = i;
1150 pSymFile->aShdrs[i].sh_name = offStrTab;
1151 APPEND_STR(".text");
1152 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1153 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1154# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1155 pSymFile->aShdrs[i].sh_offset
1156 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1157# else
1158 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1159 pSymFile->aShdrs[i].sh_offset = 0;
1160# endif
1161 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1162 pSymFile->aShdrs[i].sh_link = 0;
1163 pSymFile->aShdrs[i].sh_info = 0;
1164 pSymFile->aShdrs[i].sh_addralign = 1;
1165 pSymFile->aShdrs[i].sh_entsize = 0;
1166 i++;
1167
1168 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1169
1170# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1171 /*
1172 * The program headers:
1173 */
1174 /* Everything in a single LOAD segment: */
1175 i = 0;
1176 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1177 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1178 pSymFile->aPhdrs[i].p_offset
1179 = pSymFile->aPhdrs[i].p_vaddr
1180 = pSymFile->aPhdrs[i].p_paddr = 0;
1181 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1182 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1183 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1184 i++;
1185 /* The .dynamic segment. */
1186 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1187 pSymFile->aPhdrs[i].p_flags = PF_R;
1188 pSymFile->aPhdrs[i].p_offset
1189 = pSymFile->aPhdrs[i].p_vaddr
1190 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1191 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1192 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1193 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1194 i++;
1195
1196 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1197
1198 /*
1199 * The dynamic section:
1200 */
1201 i = 0;
1202 pSymFile->aDyn[i].d_tag = DT_SONAME;
1203 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1204 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1205 i++;
1206 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1207 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1208 i++;
1209 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1210 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1211 i++;
1212 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1213 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1214 i++;
1215 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1216 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1217 i++;
1218 pSymFile->aDyn[i].d_tag = DT_NULL;
1219 i++;
1220 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1221# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1222
1223 /*
1224 * Symbol tables:
1225 */
1226 /** @todo gdb doesn't seem to really like this ... */
1227 i = 0;
1228 pSymFile->aSymbols[i].st_name = 0;
1229 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1230 pSymFile->aSymbols[i].st_value = 0;
1231 pSymFile->aSymbols[i].st_size = 0;
1232 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1233 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1234# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1235 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1236# endif
1237 i++;
1238
1239 pSymFile->aSymbols[i].st_name = 0;
1240 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1241 pSymFile->aSymbols[i].st_value = 0;
1242 pSymFile->aSymbols[i].st_size = 0;
1243 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1244 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1245 i++;
1246
1247 pSymFile->aSymbols[i].st_name = offStrTab;
1248 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1249# if 0
1250 pSymFile->aSymbols[i].st_shndx = iShText;
1251 pSymFile->aSymbols[i].st_value = 0;
1252# else
1253 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1254 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1255# endif
1256 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1257 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1258 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1259# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1260 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1261 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1262# endif
1263 i++;
1264
1265 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1266 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1267
1268 /*
1269 * The GDB JIT entry and informing GDB.
1270 */
1271 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1272# if 1
1273 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1274# else
1275 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1276# endif
1277
1278 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1279 RTCritSectEnter(&g_IemNativeGdbJitLock);
1280 pEhFrame->GdbJitEntry.pNext = NULL;
1281 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1282 if (__jit_debug_descriptor.pTail)
1283 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1284 else
1285 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1286 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1287 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1288
1289 /* Notify GDB: */
1290 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1291 __jit_debug_register_code();
1292 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1293 RTCritSectLeave(&g_IemNativeGdbJitLock);
1294
1295# else /* !IEMNATIVE_USE_GDB_JIT */
1296 RT_NOREF(pVCpu);
1297# endif /* !IEMNATIVE_USE_GDB_JIT */
1298
1299 return VINF_SUCCESS;
1300}
1301
1302# endif /* !RT_OS_WINDOWS */
1303#endif /* IN_RING3 */
1304
1305
1306/**
1307 * Adds another chunk to the executable memory allocator.
1308 *
1309 * This is used by the init code for the initial allocation and later by the
1310 * regular allocator function when it's out of memory.
1311 */
1312static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1313{
1314 /* Check that we've room for growth. */
1315 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1316 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1317
1318 /* Allocate a chunk. */
1319#ifdef RT_OS_DARWIN
1320 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1321#else
1322 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1323#endif
1324 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1325
1326#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1327 int rc = VINF_SUCCESS;
1328#else
1329 /* Initialize the heap for the chunk. */
1330 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1331 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1332 AssertRC(rc);
1333 if (RT_SUCCESS(rc))
1334 {
1335 /*
1336 * We want the memory to be aligned on 64 byte, so the first time thru
1337 * here we do some exploratory allocations to see how we can achieve this.
1338 * On subsequent runs we only make an initial adjustment allocation, if
1339 * necessary.
1340 *
1341 * Since we own the heap implementation, we know that the internal block
1342 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1343 * so all we need to wrt allocation size adjustments is to add 32 bytes
1344 * to the size, align up by 64 bytes, and subtract 32 bytes.
1345 *
1346 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1347 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1348 * allocation to force subsequent allocations to return 64 byte aligned
1349 * user areas.
1350 */
1351 if (!pExecMemAllocator->cbHeapBlockHdr)
1352 {
1353 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1354 pExecMemAllocator->cbHeapAlignTweak = 64;
1355 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1356 32 /*cbAlignment*/);
1357 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1358
1359 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1360 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1361 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1362 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1363 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1364
1365 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1366 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1367 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1368 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1369 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1370
1371 RTHeapSimpleFree(hHeap, pvTest2);
1372 RTHeapSimpleFree(hHeap, pvTest1);
1373 }
1374 else
1375 {
1376 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1377 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1378 }
1379 if (RT_SUCCESS(rc))
1380#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1381 {
1382 /*
1383 * Add the chunk.
1384 *
1385 * This must be done before the unwind init so windows can allocate
1386 * memory from the chunk when using the alternative sub-allocator.
1387 */
1388 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1389#ifdef IN_RING3
1390 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1391#endif
1392#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1393 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1394#else
1395 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1396 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1397 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1398 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1399#endif
1400
1401 pExecMemAllocator->cChunks = idxChunk + 1;
1402 pExecMemAllocator->idxChunkHint = idxChunk;
1403
1404#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1405 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1406 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1407#else
1408 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1409 pExecMemAllocator->cbTotal += cbFree;
1410 pExecMemAllocator->cbFree += cbFree;
1411#endif
1412
1413#ifdef IN_RING3
1414 /*
1415 * Initialize the unwind information (this cannot really fail atm).
1416 * (This sets pvUnwindInfo.)
1417 */
1418 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1419 if (RT_SUCCESS(rc))
1420#endif
1421 {
1422 return VINF_SUCCESS;
1423 }
1424
1425#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1426 /* Just in case the impossible happens, undo the above up: */
1427 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1428 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1429 pExecMemAllocator->cChunks = idxChunk;
1430 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1431 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1432 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1433 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1434#endif
1435 }
1436#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1437 }
1438#endif
1439 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1440 RT_NOREF(pVCpu);
1441 return rc;
1442}
1443
1444
1445/**
1446 * Initializes the executable memory allocator for native recompilation on the
1447 * calling EMT.
1448 *
1449 * @returns VBox status code.
1450 * @param pVCpu The cross context virtual CPU structure of the calling
1451 * thread.
1452 * @param cbMax The max size of the allocator.
1453 * @param cbInitial The initial allocator size.
1454 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1455 * dependent).
1456 */
1457int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1458{
1459 /*
1460 * Validate input.
1461 */
1462 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1463 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1464 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1465 || cbChunk == 0
1466 || ( RT_IS_POWER_OF_TWO(cbChunk)
1467 && cbChunk >= _1M
1468 && cbChunk <= _256M
1469 && cbChunk <= cbMax),
1470 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1471 VERR_OUT_OF_RANGE);
1472
1473 /*
1474 * Adjust/figure out the chunk size.
1475 */
1476 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1477 {
1478 if (cbMax >= _256M)
1479 cbChunk = _64M;
1480 else
1481 {
1482 if (cbMax < _16M)
1483 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1484 else
1485 cbChunk = (uint32_t)cbMax / 4;
1486 if (!RT_IS_POWER_OF_TWO(cbChunk))
1487 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1488 }
1489 }
1490
1491 if (cbChunk > cbMax)
1492 cbMax = cbChunk;
1493 else
1494 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1495 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1496 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1497
1498 /*
1499 * Allocate and initialize the allocatore instance.
1500 */
1501 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1502#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1503 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1504 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1505 cbNeeded += cbBitmap * cMaxChunks;
1506 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1507 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1508#endif
1509#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1510 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1511 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1512#endif
1513 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1514 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1515 VERR_NO_MEMORY);
1516 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1517 pExecMemAllocator->cbChunk = cbChunk;
1518 pExecMemAllocator->cMaxChunks = cMaxChunks;
1519 pExecMemAllocator->cChunks = 0;
1520 pExecMemAllocator->idxChunkHint = 0;
1521 pExecMemAllocator->cAllocations = 0;
1522 pExecMemAllocator->cbTotal = 0;
1523 pExecMemAllocator->cbFree = 0;
1524 pExecMemAllocator->cbAllocated = 0;
1525#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1526 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1527 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1528 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1529 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1530#endif
1531#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1532 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1533#endif
1534 for (uint32_t i = 0; i < cMaxChunks; i++)
1535 {
1536#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1537 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1538 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1539#else
1540 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1541#endif
1542 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1543#ifdef IN_RING0
1544 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1545#else
1546 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1547#endif
1548 }
1549 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1550
1551 /*
1552 * Do the initial allocations.
1553 */
1554 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1555 {
1556 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1557 AssertLogRelRCReturn(rc, rc);
1558 }
1559
1560 pExecMemAllocator->idxChunkHint = 0;
1561
1562 return VINF_SUCCESS;
1563}
1564
1565
1566/*********************************************************************************************************************************
1567* Native Recompilation *
1568*********************************************************************************************************************************/
1569
1570
1571/**
1572 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1573 */
1574IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1575{
1576 pVCpu->iem.s.cInstructions += idxInstr;
1577 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1578}
1579
1580
1581/**
1582 * Used by TB code when it wants to raise a \#DE.
1583 */
1584IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseDe,(PVMCPUCC pVCpu))
1585{
1586 iemRaiseDivideErrorJmp(pVCpu);
1587#ifndef _MSC_VER
1588 return VINF_IEM_RAISED_XCPT; /* not reached */
1589#endif
1590}
1591
1592
1593/**
1594 * Used by TB code when it wants to raise a \#UD.
1595 */
1596IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
1597{
1598 iemRaiseUndefinedOpcodeJmp(pVCpu);
1599#ifndef _MSC_VER
1600 return VINF_IEM_RAISED_XCPT; /* not reached */
1601#endif
1602}
1603
1604
1605/**
1606 * Used by TB code when it wants to raise an SSE related \#UD or \#NM.
1607 *
1608 * See IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT.
1609 */
1610IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseRelated,(PVMCPUCC pVCpu))
1611{
1612 if ( (pVCpu->cpum.GstCtx.cr0 & X86_CR0_EM)
1613 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSFXSR))
1614 iemRaiseUndefinedOpcodeJmp(pVCpu);
1615 else
1616 iemRaiseDeviceNotAvailableJmp(pVCpu);
1617#ifndef _MSC_VER
1618 return VINF_IEM_RAISED_XCPT; /* not reached */
1619#endif
1620}
1621
1622
1623/**
1624 * Used by TB code when it wants to raise an AVX related \#UD or \#NM.
1625 *
1626 * See IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT.
1627 */
1628IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseAvxRelated,(PVMCPUCC pVCpu))
1629{
1630 if ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE)
1631 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE))
1632 iemRaiseUndefinedOpcodeJmp(pVCpu);
1633 else
1634 iemRaiseDeviceNotAvailableJmp(pVCpu);
1635#ifndef _MSC_VER
1636 return VINF_IEM_RAISED_XCPT; /* not reached */
1637#endif
1638}
1639
1640
1641/**
1642 * Used by TB code when it wants to raise an SSE/AVX floating point exception related \#UD or \#XF.
1643 *
1644 * See IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT.
1645 */
1646IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseAvxFpRelated,(PVMCPUCC pVCpu))
1647{
1648 if (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXMMEEXCPT)
1649 iemRaiseSimdFpExceptionJmp(pVCpu);
1650 else
1651 iemRaiseUndefinedOpcodeJmp(pVCpu);
1652#ifndef _MSC_VER
1653 return VINF_IEM_RAISED_XCPT; /* not reached */
1654#endif
1655}
1656
1657
1658/**
1659 * Used by TB code when it wants to raise a \#NM.
1660 */
1661IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
1662{
1663 iemRaiseDeviceNotAvailableJmp(pVCpu);
1664#ifndef _MSC_VER
1665 return VINF_IEM_RAISED_XCPT; /* not reached */
1666#endif
1667}
1668
1669
1670/**
1671 * Used by TB code when it wants to raise a \#GP(0).
1672 */
1673IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
1674{
1675 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1676#ifndef _MSC_VER
1677 return VINF_IEM_RAISED_XCPT; /* not reached */
1678#endif
1679}
1680
1681
1682/**
1683 * Used by TB code when it wants to raise a \#MF.
1684 */
1685IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
1686{
1687 iemRaiseMathFaultJmp(pVCpu);
1688#ifndef _MSC_VER
1689 return VINF_IEM_RAISED_XCPT; /* not reached */
1690#endif
1691}
1692
1693
1694/**
1695 * Used by TB code when it wants to raise a \#XF.
1696 */
1697IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
1698{
1699 iemRaiseSimdFpExceptionJmp(pVCpu);
1700#ifndef _MSC_VER
1701 return VINF_IEM_RAISED_XCPT; /* not reached */
1702#endif
1703}
1704
1705
1706/**
1707 * Used by TB code when detecting opcode changes.
1708 * @see iemThreadeFuncWorkerObsoleteTb
1709 */
1710IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
1711{
1712 /* We set fSafeToFree to false where as we're being called in the context
1713 of a TB callback function, which for native TBs means we cannot release
1714 the executable memory till we've returned our way back to iemTbExec as
1715 that return path codes via the native code generated for the TB. */
1716 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1717 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
1718 return VINF_IEM_REEXEC_BREAK;
1719}
1720
1721
1722/**
1723 * Used by TB code when we need to switch to a TB with CS.LIM checking.
1724 */
1725IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
1726{
1727 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
1728 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1729 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
1730 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
1731 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
1732 return VINF_IEM_REEXEC_BREAK;
1733}
1734
1735
1736/**
1737 * Used by TB code when we missed a PC check after a branch.
1738 */
1739IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
1740{
1741 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
1742 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1743 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
1744 pVCpu->iem.s.pbInstrBuf));
1745 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
1746 return VINF_IEM_REEXEC_BREAK;
1747}
1748
1749
1750
1751/*********************************************************************************************************************************
1752* Helpers: Segmented memory fetches and stores. *
1753*********************************************************************************************************************************/
1754
1755/**
1756 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1757 */
1758IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1759{
1760#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1761 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1762#else
1763 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1764#endif
1765}
1766
1767
1768/**
1769 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1770 * to 16 bits.
1771 */
1772IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1773{
1774#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1775 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1776#else
1777 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1778#endif
1779}
1780
1781
1782/**
1783 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1784 * to 32 bits.
1785 */
1786IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1787{
1788#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1789 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1790#else
1791 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1792#endif
1793}
1794
1795/**
1796 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1797 * to 64 bits.
1798 */
1799IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1800{
1801#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1802 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1803#else
1804 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1805#endif
1806}
1807
1808
1809/**
1810 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1811 */
1812IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1813{
1814#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1815 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1816#else
1817 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1818#endif
1819}
1820
1821
1822/**
1823 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1824 * to 32 bits.
1825 */
1826IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1827{
1828#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1829 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1830#else
1831 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1832#endif
1833}
1834
1835
1836/**
1837 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1838 * to 64 bits.
1839 */
1840IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1841{
1842#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1843 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1844#else
1845 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1846#endif
1847}
1848
1849
1850/**
1851 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1852 */
1853IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1854{
1855#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1856 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1857#else
1858 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1859#endif
1860}
1861
1862
1863/**
1864 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
1865 * to 64 bits.
1866 */
1867IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1868{
1869#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1870 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1871#else
1872 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1873#endif
1874}
1875
1876
1877/**
1878 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1879 */
1880IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1881{
1882#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1883 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
1884#else
1885 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
1886#endif
1887}
1888
1889
1890#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1891/**
1892 * Used by TB code to load 128-bit data w/ segmentation.
1893 */
1894IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
1895{
1896#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1897 iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1898#else
1899 iemMemFetchDataU128Jmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1900#endif
1901}
1902
1903
1904/**
1905 * Used by TB code to load 128-bit data w/ segmentation.
1906 */
1907IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
1908{
1909#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1910 iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1911#else
1912 iemMemFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1913#endif
1914}
1915
1916
1917/**
1918 * Used by TB code to load 128-bit data w/ segmentation.
1919 */
1920IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
1921{
1922#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1923 iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1924#else
1925 iemMemFetchDataU128NoAcJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1926#endif
1927}
1928
1929
1930/**
1931 * Used by TB code to load 256-bit data w/ segmentation.
1932 */
1933IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
1934{
1935#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1936 iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
1937#else
1938 iemMemFetchDataU256NoAcJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
1939#endif
1940}
1941
1942
1943/**
1944 * Used by TB code to load 256-bit data w/ segmentation.
1945 */
1946IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
1947{
1948#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1949 iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
1950#else
1951 iemMemFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
1952#endif
1953}
1954#endif
1955
1956
1957/**
1958 * Used by TB code to store unsigned 8-bit data w/ segmentation.
1959 */
1960IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
1961{
1962#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1963 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1964#else
1965 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1966#endif
1967}
1968
1969
1970/**
1971 * Used by TB code to store unsigned 16-bit data w/ segmentation.
1972 */
1973IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
1974{
1975#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1976 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1977#else
1978 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1979#endif
1980}
1981
1982
1983/**
1984 * Used by TB code to store unsigned 32-bit data w/ segmentation.
1985 */
1986IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
1987{
1988#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1989 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1990#else
1991 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1992#endif
1993}
1994
1995
1996/**
1997 * Used by TB code to store unsigned 64-bit data w/ segmentation.
1998 */
1999IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
2000{
2001#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2002 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
2003#else
2004 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
2005#endif
2006}
2007
2008
2009#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2010/**
2011 * Used by TB code to store unsigned 128-bit data w/ segmentation.
2012 */
2013IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
2014{
2015#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2016 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
2017#else
2018 iemMemStoreDataU128AlignedSseJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
2019#endif
2020}
2021
2022
2023/**
2024 * Used by TB code to store unsigned 128-bit data w/ segmentation.
2025 */
2026IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
2027{
2028#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2029 iemMemStoreDataU128NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
2030#else
2031 iemMemStoreDataU128NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
2032#endif
2033}
2034
2035
2036/**
2037 * Used by TB code to store unsigned 256-bit data w/ segmentation.
2038 */
2039IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
2040{
2041#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2042 iemMemStoreDataU256NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
2043#else
2044 iemMemStoreDataU256NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
2045#endif
2046}
2047
2048
2049/**
2050 * Used by TB code to store unsigned 256-bit data w/ segmentation.
2051 */
2052IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
2053{
2054#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2055 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
2056#else
2057 iemMemStoreDataU256AlignedAvxcJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
2058#endif
2059}
2060#endif
2061
2062
2063
2064/**
2065 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
2066 */
2067IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2068{
2069#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2070 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2071#else
2072 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2073#endif
2074}
2075
2076
2077/**
2078 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
2079 */
2080IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2081{
2082#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2083 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2084#else
2085 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2086#endif
2087}
2088
2089
2090/**
2091 * Used by TB code to store an 32-bit selector value onto a generic stack.
2092 *
2093 * Intel CPUs doesn't do write a whole dword, thus the special function.
2094 */
2095IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2096{
2097#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2098 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2099#else
2100 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2101#endif
2102}
2103
2104
2105/**
2106 * Used by TB code to push unsigned 64-bit value onto a generic stack.
2107 */
2108IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2109{
2110#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2111 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2112#else
2113 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2114#endif
2115}
2116
2117
2118/**
2119 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2120 */
2121IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2122{
2123#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2124 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2125#else
2126 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
2127#endif
2128}
2129
2130
2131/**
2132 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2133 */
2134IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2135{
2136#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2137 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2138#else
2139 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
2140#endif
2141}
2142
2143
2144/**
2145 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2146 */
2147IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2148{
2149#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2150 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2151#else
2152 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
2153#endif
2154}
2155
2156
2157
2158/*********************************************************************************************************************************
2159* Helpers: Flat memory fetches and stores. *
2160*********************************************************************************************************************************/
2161
2162/**
2163 * Used by TB code to load unsigned 8-bit data w/ flat address.
2164 * @note Zero extending the value to 64-bit to simplify assembly.
2165 */
2166IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2167{
2168#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2169 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2170#else
2171 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2172#endif
2173}
2174
2175
2176/**
2177 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2178 * to 16 bits.
2179 * @note Zero extending the value to 64-bit to simplify assembly.
2180 */
2181IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2182{
2183#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2184 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2185#else
2186 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2187#endif
2188}
2189
2190
2191/**
2192 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2193 * to 32 bits.
2194 * @note Zero extending the value to 64-bit to simplify assembly.
2195 */
2196IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2197{
2198#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2199 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2200#else
2201 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2202#endif
2203}
2204
2205
2206/**
2207 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2208 * to 64 bits.
2209 */
2210IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2211{
2212#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2213 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2214#else
2215 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2216#endif
2217}
2218
2219
2220/**
2221 * Used by TB code to load unsigned 16-bit data w/ flat address.
2222 * @note Zero extending the value to 64-bit to simplify assembly.
2223 */
2224IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2225{
2226#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2227 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2228#else
2229 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2230#endif
2231}
2232
2233
2234/**
2235 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2236 * to 32 bits.
2237 * @note Zero extending the value to 64-bit to simplify assembly.
2238 */
2239IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2240{
2241#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2242 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2243#else
2244 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2245#endif
2246}
2247
2248
2249/**
2250 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2251 * to 64 bits.
2252 * @note Zero extending the value to 64-bit to simplify assembly.
2253 */
2254IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2255{
2256#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2257 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2258#else
2259 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2260#endif
2261}
2262
2263
2264/**
2265 * Used by TB code to load unsigned 32-bit data w/ flat address.
2266 * @note Zero extending the value to 64-bit to simplify assembly.
2267 */
2268IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2269{
2270#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2271 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2272#else
2273 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2274#endif
2275}
2276
2277
2278/**
2279 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
2280 * to 64 bits.
2281 * @note Zero extending the value to 64-bit to simplify assembly.
2282 */
2283IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2284{
2285#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2286 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2287#else
2288 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2289#endif
2290}
2291
2292
2293/**
2294 * Used by TB code to load unsigned 64-bit data w/ flat address.
2295 */
2296IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2297{
2298#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2299 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2300#else
2301 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
2302#endif
2303}
2304
2305
2306#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2307/**
2308 * Used by TB code to load unsigned 128-bit data w/ flat address.
2309 */
2310IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
2311{
2312#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2313 return iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2314#else
2315 return iemMemFlatFetchDataU128Jmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2316#endif
2317}
2318
2319
2320/**
2321 * Used by TB code to load unsigned 128-bit data w/ flat address.
2322 */
2323IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
2324{
2325#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2326 return iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2327#else
2328 return iemMemFlatFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2329#endif
2330}
2331
2332
2333/**
2334 * Used by TB code to load unsigned 128-bit data w/ flat address.
2335 */
2336IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
2337{
2338#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2339 return iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2340#else
2341 return iemMemFlatFetchDataU128NoAcJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2342#endif
2343}
2344
2345
2346/**
2347 * Used by TB code to load unsigned 256-bit data w/ flat address.
2348 */
2349IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
2350{
2351#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2352 return iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
2353#else
2354 return iemMemFlatFetchDataU256NoAcJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
2355#endif
2356}
2357
2358
2359/**
2360 * Used by TB code to load unsigned 256-bit data w/ flat address.
2361 */
2362IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
2363{
2364#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2365 return iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
2366#else
2367 return iemMemFlatFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
2368#endif
2369}
2370#endif
2371
2372
2373/**
2374 * Used by TB code to store unsigned 8-bit data w/ flat address.
2375 */
2376IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
2377{
2378#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2379 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
2380#else
2381 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
2382#endif
2383}
2384
2385
2386/**
2387 * Used by TB code to store unsigned 16-bit data w/ flat address.
2388 */
2389IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2390{
2391#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2392 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
2393#else
2394 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
2395#endif
2396}
2397
2398
2399/**
2400 * Used by TB code to store unsigned 32-bit data w/ flat address.
2401 */
2402IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2403{
2404#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2405 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
2406#else
2407 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
2408#endif
2409}
2410
2411
2412/**
2413 * Used by TB code to store unsigned 64-bit data w/ flat address.
2414 */
2415IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2416{
2417#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2418 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
2419#else
2420 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
2421#endif
2422}
2423
2424
2425#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2426/**
2427 * Used by TB code to store unsigned 128-bit data w/ flat address.
2428 */
2429IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
2430{
2431#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2432 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
2433#else
2434 iemMemFlatStoreDataU128AlignedSseJmp(pVCpu, GCPtrMem, pu128Src);
2435#endif
2436}
2437
2438
2439/**
2440 * Used by TB code to store unsigned 128-bit data w/ flat address.
2441 */
2442IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
2443{
2444#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2445 iemMemStoreDataU128NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
2446#else
2447 iemMemFlatStoreDataU128NoAcJmp(pVCpu, GCPtrMem, pu128Src);
2448#endif
2449}
2450
2451
2452/**
2453 * Used by TB code to store unsigned 256-bit data w/ flat address.
2454 */
2455IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
2456{
2457#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2458 iemMemStoreDataU256NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
2459#else
2460 iemMemFlatStoreDataU256NoAcJmp(pVCpu, GCPtrMem, pu256Src);
2461#endif
2462}
2463
2464
2465/**
2466 * Used by TB code to store unsigned 256-bit data w/ flat address.
2467 */
2468IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
2469{
2470#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2471 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
2472#else
2473 iemMemFlatStoreDataU256AlignedAvxJmp(pVCpu, GCPtrMem, pu256Src);
2474#endif
2475}
2476#endif
2477
2478
2479
2480/**
2481 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
2482 */
2483IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2484{
2485#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2486 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2487#else
2488 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2489#endif
2490}
2491
2492
2493/**
2494 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
2495 */
2496IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2497{
2498#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2499 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2500#else
2501 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2502#endif
2503}
2504
2505
2506/**
2507 * Used by TB code to store a segment selector value onto a flat stack.
2508 *
2509 * Intel CPUs doesn't do write a whole dword, thus the special function.
2510 */
2511IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2512{
2513#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2514 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2515#else
2516 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2517#endif
2518}
2519
2520
2521/**
2522 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
2523 */
2524IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2525{
2526#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2527 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2528#else
2529 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2530#endif
2531}
2532
2533
2534/**
2535 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2536 */
2537IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2538{
2539#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2540 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2541#else
2542 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
2543#endif
2544}
2545
2546
2547/**
2548 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2549 */
2550IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2551{
2552#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2553 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2554#else
2555 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
2556#endif
2557}
2558
2559
2560/**
2561 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2562 */
2563IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2564{
2565#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2566 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2567#else
2568 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
2569#endif
2570}
2571
2572
2573
2574/*********************************************************************************************************************************
2575* Helpers: Segmented memory mapping. *
2576*********************************************************************************************************************************/
2577
2578/**
2579 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
2580 * segmentation.
2581 */
2582IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2583 RTGCPTR GCPtrMem, uint8_t iSegReg))
2584{
2585#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2586 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2587#else
2588 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2589#endif
2590}
2591
2592
2593/**
2594 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
2595 */
2596IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2597 RTGCPTR GCPtrMem, uint8_t iSegReg))
2598{
2599#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2600 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2601#else
2602 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2603#endif
2604}
2605
2606
2607/**
2608 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
2609 */
2610IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2611 RTGCPTR GCPtrMem, uint8_t iSegReg))
2612{
2613#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2614 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2615#else
2616 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2617#endif
2618}
2619
2620
2621/**
2622 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
2623 */
2624IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2625 RTGCPTR GCPtrMem, uint8_t iSegReg))
2626{
2627#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2628 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2629#else
2630 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2631#endif
2632}
2633
2634
2635/**
2636 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
2637 * segmentation.
2638 */
2639IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2640 RTGCPTR GCPtrMem, uint8_t iSegReg))
2641{
2642#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2643 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2644#else
2645 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2646#endif
2647}
2648
2649
2650/**
2651 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
2652 */
2653IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2654 RTGCPTR GCPtrMem, uint8_t iSegReg))
2655{
2656#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2657 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2658#else
2659 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2660#endif
2661}
2662
2663
2664/**
2665 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
2666 */
2667IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2668 RTGCPTR GCPtrMem, uint8_t iSegReg))
2669{
2670#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2671 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2672#else
2673 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2674#endif
2675}
2676
2677
2678/**
2679 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
2680 */
2681IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2682 RTGCPTR GCPtrMem, uint8_t iSegReg))
2683{
2684#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2685 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2686#else
2687 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2688#endif
2689}
2690
2691
2692/**
2693 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
2694 * segmentation.
2695 */
2696IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2697 RTGCPTR GCPtrMem, uint8_t iSegReg))
2698{
2699#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2700 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2701#else
2702 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2703#endif
2704}
2705
2706
2707/**
2708 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
2709 */
2710IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2711 RTGCPTR GCPtrMem, uint8_t iSegReg))
2712{
2713#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2714 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2715#else
2716 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2717#endif
2718}
2719
2720
2721/**
2722 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
2723 */
2724IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2725 RTGCPTR GCPtrMem, uint8_t iSegReg))
2726{
2727#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2728 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2729#else
2730 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2731#endif
2732}
2733
2734
2735/**
2736 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
2737 */
2738IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2739 RTGCPTR GCPtrMem, uint8_t iSegReg))
2740{
2741#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2742 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2743#else
2744 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2745#endif
2746}
2747
2748
2749/**
2750 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
2751 * segmentation.
2752 */
2753IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2754 RTGCPTR GCPtrMem, uint8_t iSegReg))
2755{
2756#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2757 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2758#else
2759 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2760#endif
2761}
2762
2763
2764/**
2765 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
2766 */
2767IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2768 RTGCPTR GCPtrMem, uint8_t iSegReg))
2769{
2770#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2771 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2772#else
2773 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2774#endif
2775}
2776
2777
2778/**
2779 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
2780 */
2781IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2782 RTGCPTR GCPtrMem, uint8_t iSegReg))
2783{
2784#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2785 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2786#else
2787 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2788#endif
2789}
2790
2791
2792/**
2793 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
2794 */
2795IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2796 RTGCPTR GCPtrMem, uint8_t iSegReg))
2797{
2798#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2799 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2800#else
2801 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2802#endif
2803}
2804
2805
2806/**
2807 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
2808 */
2809IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2810 RTGCPTR GCPtrMem, uint8_t iSegReg))
2811{
2812#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2813 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2814#else
2815 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2816#endif
2817}
2818
2819
2820/**
2821 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
2822 */
2823IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2824 RTGCPTR GCPtrMem, uint8_t iSegReg))
2825{
2826#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2827 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2828#else
2829 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2830#endif
2831}
2832
2833
2834/**
2835 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
2836 * segmentation.
2837 */
2838IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2839 RTGCPTR GCPtrMem, uint8_t iSegReg))
2840{
2841#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2842 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2843#else
2844 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2845#endif
2846}
2847
2848
2849/**
2850 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
2851 */
2852IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2853 RTGCPTR GCPtrMem, uint8_t iSegReg))
2854{
2855#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2856 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2857#else
2858 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2859#endif
2860}
2861
2862
2863/**
2864 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
2865 */
2866IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2867 RTGCPTR GCPtrMem, uint8_t iSegReg))
2868{
2869#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2870 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2871#else
2872 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2873#endif
2874}
2875
2876
2877/**
2878 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
2879 */
2880IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2881 RTGCPTR GCPtrMem, uint8_t iSegReg))
2882{
2883#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2884 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2885#else
2886 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2887#endif
2888}
2889
2890
2891/*********************************************************************************************************************************
2892* Helpers: Flat memory mapping. *
2893*********************************************************************************************************************************/
2894
2895/**
2896 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
2897 * address.
2898 */
2899IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2900{
2901#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2902 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2903#else
2904 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2905#endif
2906}
2907
2908
2909/**
2910 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
2911 */
2912IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2913{
2914#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2915 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2916#else
2917 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2918#endif
2919}
2920
2921
2922/**
2923 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
2924 */
2925IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2926{
2927#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2928 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2929#else
2930 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2931#endif
2932}
2933
2934
2935/**
2936 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
2937 */
2938IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2939{
2940#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2941 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2942#else
2943 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2944#endif
2945}
2946
2947
2948/**
2949 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
2950 * address.
2951 */
2952IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2953{
2954#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2955 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2956#else
2957 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2958#endif
2959}
2960
2961
2962/**
2963 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
2964 */
2965IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2966{
2967#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2968 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2969#else
2970 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2971#endif
2972}
2973
2974
2975/**
2976 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
2977 */
2978IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2979{
2980#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2981 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2982#else
2983 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2984#endif
2985}
2986
2987
2988/**
2989 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
2990 */
2991IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2992{
2993#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2994 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2995#else
2996 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2997#endif
2998}
2999
3000
3001/**
3002 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
3003 * address.
3004 */
3005IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3006{
3007#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3008 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3009#else
3010 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3011#endif
3012}
3013
3014
3015/**
3016 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
3017 */
3018IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3019{
3020#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3021 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3022#else
3023 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3024#endif
3025}
3026
3027
3028/**
3029 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
3030 */
3031IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3032{
3033#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3034 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3035#else
3036 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3037#endif
3038}
3039
3040
3041/**
3042 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
3043 */
3044IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3045{
3046#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3047 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3048#else
3049 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3050#endif
3051}
3052
3053
3054/**
3055 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
3056 * address.
3057 */
3058IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3059{
3060#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3061 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3062#else
3063 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3064#endif
3065}
3066
3067
3068/**
3069 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
3070 */
3071IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3072{
3073#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3074 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3075#else
3076 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3077#endif
3078}
3079
3080
3081/**
3082 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
3083 */
3084IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3085{
3086#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3087 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3088#else
3089 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3090#endif
3091}
3092
3093
3094/**
3095 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
3096 */
3097IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3098{
3099#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3100 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3101#else
3102 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3103#endif
3104}
3105
3106
3107/**
3108 * Used by TB code to map 80-bit float data writeonly w/ flat address.
3109 */
3110IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3111{
3112#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3113 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3114#else
3115 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3116#endif
3117}
3118
3119
3120/**
3121 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
3122 */
3123IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3124{
3125#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3126 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3127#else
3128 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3129#endif
3130}
3131
3132
3133/**
3134 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
3135 * address.
3136 */
3137IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3138{
3139#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3140 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3141#else
3142 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3143#endif
3144}
3145
3146
3147/**
3148 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
3149 */
3150IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3151{
3152#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3153 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3154#else
3155 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3156#endif
3157}
3158
3159
3160/**
3161 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
3162 */
3163IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3164{
3165#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3166 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3167#else
3168 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3169#endif
3170}
3171
3172
3173/**
3174 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
3175 */
3176IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3177{
3178#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3179 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3180#else
3181 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3182#endif
3183}
3184
3185
3186/*********************************************************************************************************************************
3187* Helpers: Commit, rollback & unmap *
3188*********************************************************************************************************************************/
3189
3190/**
3191 * Used by TB code to commit and unmap a read-write memory mapping.
3192 */
3193IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3194{
3195 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
3196}
3197
3198
3199/**
3200 * Used by TB code to commit and unmap a read-write memory mapping.
3201 */
3202IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3203{
3204 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
3205}
3206
3207
3208/**
3209 * Used by TB code to commit and unmap a write-only memory mapping.
3210 */
3211IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3212{
3213 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
3214}
3215
3216
3217/**
3218 * Used by TB code to commit and unmap a read-only memory mapping.
3219 */
3220IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3221{
3222 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
3223}
3224
3225
3226/**
3227 * Reinitializes the native recompiler state.
3228 *
3229 * Called before starting a new recompile job.
3230 */
3231static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
3232{
3233 pReNative->cLabels = 0;
3234 pReNative->bmLabelTypes = 0;
3235 pReNative->cFixups = 0;
3236#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3237 pReNative->pDbgInfo->cEntries = 0;
3238#endif
3239 pReNative->pTbOrg = pTb;
3240 pReNative->cCondDepth = 0;
3241 pReNative->uCondSeqNo = 0;
3242 pReNative->uCheckIrqSeqNo = 0;
3243 pReNative->uTlbSeqNo = 0;
3244
3245#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3246 pReNative->Core.offPc = 0;
3247 pReNative->Core.cInstrPcUpdateSkipped = 0;
3248#endif
3249#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3250 pReNative->fSimdRaiseXcptChecksEmitted = 0;
3251#endif
3252 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
3253#if IEMNATIVE_HST_GREG_COUNT < 32
3254 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
3255#endif
3256 ;
3257 pReNative->Core.bmHstRegsWithGstShadow = 0;
3258 pReNative->Core.bmGstRegShadows = 0;
3259#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3260 pReNative->Core.bmGstRegShadowDirty = 0;
3261#endif
3262 pReNative->Core.bmVars = 0;
3263 pReNative->Core.bmStack = 0;
3264 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
3265 pReNative->Core.u64ArgVars = UINT64_MAX;
3266
3267 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 17);
3268 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
3269 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
3270 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
3271 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
3272 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
3273 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
3274 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
3275 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
3276 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
3277 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
3278 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
3279 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
3280 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
3281 pReNative->aidxUniqueLabels[13] = UINT32_MAX;
3282 pReNative->aidxUniqueLabels[14] = UINT32_MAX;
3283 pReNative->aidxUniqueLabels[15] = UINT32_MAX;
3284 pReNative->aidxUniqueLabels[16] = UINT32_MAX;
3285
3286 /* Full host register reinit: */
3287 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
3288 {
3289 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
3290 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
3291 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
3292 }
3293
3294 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
3295 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
3296#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3297 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
3298#endif
3299#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3300 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
3301#endif
3302#ifdef IEMNATIVE_REG_FIXED_TMP1
3303 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
3304#endif
3305#ifdef IEMNATIVE_REG_FIXED_PC_DBG
3306 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
3307#endif
3308 );
3309 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3310 {
3311 fRegs &= ~RT_BIT_32(idxReg);
3312 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
3313 }
3314
3315 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
3316#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3317 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
3318#endif
3319#ifdef IEMNATIVE_REG_FIXED_TMP0
3320 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3321#endif
3322#ifdef IEMNATIVE_REG_FIXED_TMP1
3323 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
3324#endif
3325#ifdef IEMNATIVE_REG_FIXED_PC_DBG
3326 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
3327#endif
3328
3329#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3330# ifdef RT_ARCH_ARM64
3331 /*
3332 * Arm64 has 32 128-bit registers only, in order to support emulating 256-bit registers we pair
3333 * two real registers statically to one virtual for now, leaving us with only 16 256-bit registers.
3334 * We always pair v0 with v1, v2 with v3, etc. so we mark the higher register as fixed here during init
3335 * and the register allocator assumes that it will be always free when the lower is picked.
3336 */
3337 uint32_t const fFixedAdditional = UINT32_C(0xaaaaaaaa);
3338# else
3339 uint32_t const fFixedAdditional = 0;
3340# endif
3341
3342 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
3343 | fFixedAdditional
3344# if IEMNATIVE_HST_SIMD_REG_COUNT < 32
3345 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
3346# endif
3347 ;
3348 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
3349 pReNative->Core.bmGstSimdRegShadows = 0;
3350 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
3351 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
3352
3353 /* Full host register reinit: */
3354 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
3355 {
3356 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
3357 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
3358 pReNative->Core.aHstSimdRegs[i].idxVar = UINT8_MAX;
3359 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
3360 }
3361
3362 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK | fFixedAdditional;
3363 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3364 {
3365 fRegs &= ~RT_BIT_32(idxReg);
3366 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
3367 }
3368
3369#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
3370 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3371#endif
3372
3373#endif
3374
3375 return pReNative;
3376}
3377
3378
3379/**
3380 * Allocates and initializes the native recompiler state.
3381 *
3382 * This is called the first time an EMT wants to recompile something.
3383 *
3384 * @returns Pointer to the new recompiler state.
3385 * @param pVCpu The cross context virtual CPU structure of the calling
3386 * thread.
3387 * @param pTb The TB that's about to be recompiled.
3388 * @thread EMT(pVCpu)
3389 */
3390static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
3391{
3392 VMCPU_ASSERT_EMT(pVCpu);
3393
3394 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
3395 AssertReturn(pReNative, NULL);
3396
3397 /*
3398 * Try allocate all the buffers and stuff we need.
3399 */
3400 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
3401 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
3402 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
3403#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3404 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
3405#endif
3406 if (RT_LIKELY( pReNative->pInstrBuf
3407 && pReNative->paLabels
3408 && pReNative->paFixups)
3409#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3410 && pReNative->pDbgInfo
3411#endif
3412 )
3413 {
3414 /*
3415 * Set the buffer & array sizes on success.
3416 */
3417 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
3418 pReNative->cLabelsAlloc = _8K;
3419 pReNative->cFixupsAlloc = _16K;
3420#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3421 pReNative->cDbgInfoAlloc = _16K;
3422#endif
3423
3424 /* Other constant stuff: */
3425 pReNative->pVCpu = pVCpu;
3426
3427 /*
3428 * Done, just need to save it and reinit it.
3429 */
3430 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
3431 return iemNativeReInit(pReNative, pTb);
3432 }
3433
3434 /*
3435 * Failed. Cleanup and return.
3436 */
3437 AssertFailed();
3438 RTMemFree(pReNative->pInstrBuf);
3439 RTMemFree(pReNative->paLabels);
3440 RTMemFree(pReNative->paFixups);
3441#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3442 RTMemFree(pReNative->pDbgInfo);
3443#endif
3444 RTMemFree(pReNative);
3445 return NULL;
3446}
3447
3448
3449/**
3450 * Creates a label
3451 *
3452 * If the label does not yet have a defined position,
3453 * call iemNativeLabelDefine() later to set it.
3454 *
3455 * @returns Label ID. Throws VBox status code on failure, so no need to check
3456 * the return value.
3457 * @param pReNative The native recompile state.
3458 * @param enmType The label type.
3459 * @param offWhere The instruction offset of the label. UINT32_MAX if the
3460 * label is not yet defined (default).
3461 * @param uData Data associated with the lable. Only applicable to
3462 * certain type of labels. Default is zero.
3463 */
3464DECL_HIDDEN_THROW(uint32_t)
3465iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3466 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
3467{
3468 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
3469
3470 /*
3471 * Locate existing label definition.
3472 *
3473 * This is only allowed for forward declarations where offWhere=UINT32_MAX
3474 * and uData is zero.
3475 */
3476 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3477 uint32_t const cLabels = pReNative->cLabels;
3478 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
3479#ifndef VBOX_STRICT
3480 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
3481 && offWhere == UINT32_MAX
3482 && uData == 0
3483#endif
3484 )
3485 {
3486#ifndef VBOX_STRICT
3487 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
3488 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3489 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
3490 if (idxLabel < pReNative->cLabels)
3491 return idxLabel;
3492#else
3493 for (uint32_t i = 0; i < cLabels; i++)
3494 if ( paLabels[i].enmType == enmType
3495 && paLabels[i].uData == uData)
3496 {
3497 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3498 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3499 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
3500 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
3501 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3502 return i;
3503 }
3504 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
3505 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3506#endif
3507 }
3508
3509 /*
3510 * Make sure we've got room for another label.
3511 */
3512 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
3513 { /* likely */ }
3514 else
3515 {
3516 uint32_t cNew = pReNative->cLabelsAlloc;
3517 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3518 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3519 cNew *= 2;
3520 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
3521 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
3522 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
3523 pReNative->paLabels = paLabels;
3524 pReNative->cLabelsAlloc = cNew;
3525 }
3526
3527 /*
3528 * Define a new label.
3529 */
3530 paLabels[cLabels].off = offWhere;
3531 paLabels[cLabels].enmType = enmType;
3532 paLabels[cLabels].uData = uData;
3533 pReNative->cLabels = cLabels + 1;
3534
3535 Assert((unsigned)enmType < 64);
3536 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
3537
3538 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3539 {
3540 Assert(uData == 0);
3541 pReNative->aidxUniqueLabels[enmType] = cLabels;
3542 }
3543
3544 if (offWhere != UINT32_MAX)
3545 {
3546#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3547 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3548 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
3549#endif
3550 }
3551 return cLabels;
3552}
3553
3554
3555/**
3556 * Defines the location of an existing label.
3557 *
3558 * @param pReNative The native recompile state.
3559 * @param idxLabel The label to define.
3560 * @param offWhere The position.
3561 */
3562DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
3563{
3564 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
3565 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
3566 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
3567 pLabel->off = offWhere;
3568#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3569 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3570 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
3571#endif
3572}
3573
3574
3575/**
3576 * Looks up a lable.
3577 *
3578 * @returns Label ID if found, UINT32_MAX if not.
3579 */
3580static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3581 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
3582{
3583 Assert((unsigned)enmType < 64);
3584 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
3585 {
3586 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3587 return pReNative->aidxUniqueLabels[enmType];
3588
3589 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3590 uint32_t const cLabels = pReNative->cLabels;
3591 for (uint32_t i = 0; i < cLabels; i++)
3592 if ( paLabels[i].enmType == enmType
3593 && paLabels[i].uData == uData
3594 && ( paLabels[i].off == offWhere
3595 || offWhere == UINT32_MAX
3596 || paLabels[i].off == UINT32_MAX))
3597 return i;
3598 }
3599 return UINT32_MAX;
3600}
3601
3602
3603/**
3604 * Adds a fixup.
3605 *
3606 * @throws VBox status code (int) on failure.
3607 * @param pReNative The native recompile state.
3608 * @param offWhere The instruction offset of the fixup location.
3609 * @param idxLabel The target label ID for the fixup.
3610 * @param enmType The fixup type.
3611 * @param offAddend Fixup addend if applicable to the type. Default is 0.
3612 */
3613DECL_HIDDEN_THROW(void)
3614iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
3615 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
3616{
3617 Assert(idxLabel <= UINT16_MAX);
3618 Assert((unsigned)enmType <= UINT8_MAX);
3619#ifdef RT_ARCH_ARM64
3620 AssertStmt( enmType != kIemNativeFixupType_RelImm14At5
3621 || pReNative->paLabels[idxLabel].enmType >= kIemNativeLabelType_LastWholeTbBranch,
3622 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_SHORT_JMP_TO_TAIL_LABEL));
3623#endif
3624
3625 /*
3626 * Make sure we've room.
3627 */
3628 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
3629 uint32_t const cFixups = pReNative->cFixups;
3630 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
3631 { /* likely */ }
3632 else
3633 {
3634 uint32_t cNew = pReNative->cFixupsAlloc;
3635 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3636 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3637 cNew *= 2;
3638 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
3639 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
3640 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
3641 pReNative->paFixups = paFixups;
3642 pReNative->cFixupsAlloc = cNew;
3643 }
3644
3645 /*
3646 * Add the fixup.
3647 */
3648 paFixups[cFixups].off = offWhere;
3649 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
3650 paFixups[cFixups].enmType = enmType;
3651 paFixups[cFixups].offAddend = offAddend;
3652 pReNative->cFixups = cFixups + 1;
3653}
3654
3655
3656/**
3657 * Slow code path for iemNativeInstrBufEnsure.
3658 */
3659DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
3660{
3661 /* Double the buffer size till we meet the request. */
3662 uint32_t cNew = pReNative->cInstrBufAlloc;
3663 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
3664 do
3665 cNew *= 2;
3666 while (cNew < off + cInstrReq);
3667
3668 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
3669#ifdef RT_ARCH_ARM64
3670 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
3671#else
3672 uint32_t const cbMaxInstrBuf = _2M;
3673#endif
3674 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
3675
3676 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
3677 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
3678
3679#ifdef VBOX_STRICT
3680 pReNative->offInstrBufChecked = off + cInstrReq;
3681#endif
3682 pReNative->cInstrBufAlloc = cNew;
3683 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
3684}
3685
3686#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3687
3688/**
3689 * Grows the static debug info array used during recompilation.
3690 *
3691 * @returns Pointer to the new debug info block; throws VBox status code on
3692 * failure, so no need to check the return value.
3693 */
3694DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3695{
3696 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
3697 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
3698 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
3699 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
3700 pReNative->pDbgInfo = pDbgInfo;
3701 pReNative->cDbgInfoAlloc = cNew;
3702 return pDbgInfo;
3703}
3704
3705
3706/**
3707 * Adds a new debug info uninitialized entry, returning the pointer to it.
3708 */
3709DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3710{
3711 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
3712 { /* likely */ }
3713 else
3714 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
3715 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
3716}
3717
3718
3719/**
3720 * Debug Info: Adds a native offset record, if necessary.
3721 */
3722DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3723{
3724 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
3725
3726 /*
3727 * Search backwards to see if we've got a similar record already.
3728 */
3729 uint32_t idx = pDbgInfo->cEntries;
3730 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
3731 while (idx-- > idxStop)
3732 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
3733 {
3734 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
3735 return;
3736 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
3737 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
3738 break;
3739 }
3740
3741 /*
3742 * Add it.
3743 */
3744 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
3745 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
3746 pEntry->NativeOffset.offNative = off;
3747}
3748
3749
3750/**
3751 * Debug Info: Record info about a label.
3752 */
3753static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
3754{
3755 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3756 pEntry->Label.uType = kIemTbDbgEntryType_Label;
3757 pEntry->Label.uUnused = 0;
3758 pEntry->Label.enmLabel = (uint8_t)enmType;
3759 pEntry->Label.uData = uData;
3760}
3761
3762
3763/**
3764 * Debug Info: Record info about a threaded call.
3765 */
3766static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
3767{
3768 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3769 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
3770 pEntry->ThreadedCall.fRecompiled = fRecompiled;
3771 pEntry->ThreadedCall.uUnused = 0;
3772 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
3773}
3774
3775
3776/**
3777 * Debug Info: Record info about a new guest instruction.
3778 */
3779static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
3780{
3781 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3782 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
3783 pEntry->GuestInstruction.uUnused = 0;
3784 pEntry->GuestInstruction.fExec = fExec;
3785}
3786
3787
3788/**
3789 * Debug Info: Record info about guest register shadowing.
3790 */
3791DECL_HIDDEN_THROW(void)
3792iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
3793 uint8_t idxHstReg /*= UINT8_MAX*/, uint8_t idxHstRegPrev /*= UINT8_MAX*/)
3794{
3795 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3796 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
3797#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3798 pEntry->GuestRegShadowing.fDirty = (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg)) ? 1 : 0;
3799#endif
3800 pEntry->GuestRegShadowing.uUnused = 0;
3801 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
3802 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
3803 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
3804#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3805 Assert( idxHstReg != UINT8_MAX
3806 || !(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg)));
3807#endif
3808}
3809
3810
3811# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3812/**
3813 * Debug Info: Record info about guest register shadowing.
3814 */
3815DECL_HIDDEN_THROW(void)
3816iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
3817 uint8_t idxHstSimdReg /*= UINT8_MAX*/, uint8_t idxHstSimdRegPrev /*= UINT8_MAX*/)
3818{
3819 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3820 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
3821 pEntry->GuestSimdRegShadowing.uUnused = 0;
3822 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
3823 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
3824 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
3825}
3826# endif
3827
3828
3829# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3830/**
3831 * Debug Info: Record info about delayed RIP updates.
3832 */
3833DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t offPc, uint32_t cInstrSkipped)
3834{
3835 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3836 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
3837 pEntry->DelayedPcUpdate.offPc = offPc;
3838 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
3839}
3840# endif
3841
3842#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
3843
3844
3845/*********************************************************************************************************************************
3846* Register Allocator *
3847*********************************************************************************************************************************/
3848
3849/**
3850 * Register parameter indexes (indexed by argument number).
3851 */
3852DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
3853{
3854 IEMNATIVE_CALL_ARG0_GREG,
3855 IEMNATIVE_CALL_ARG1_GREG,
3856 IEMNATIVE_CALL_ARG2_GREG,
3857 IEMNATIVE_CALL_ARG3_GREG,
3858#if defined(IEMNATIVE_CALL_ARG4_GREG)
3859 IEMNATIVE_CALL_ARG4_GREG,
3860# if defined(IEMNATIVE_CALL_ARG5_GREG)
3861 IEMNATIVE_CALL_ARG5_GREG,
3862# if defined(IEMNATIVE_CALL_ARG6_GREG)
3863 IEMNATIVE_CALL_ARG6_GREG,
3864# if defined(IEMNATIVE_CALL_ARG7_GREG)
3865 IEMNATIVE_CALL_ARG7_GREG,
3866# endif
3867# endif
3868# endif
3869#endif
3870};
3871AssertCompile(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3872
3873/**
3874 * Call register masks indexed by argument count.
3875 */
3876DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
3877{
3878 0,
3879 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
3880 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
3881 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
3882 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3883 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
3884#if defined(IEMNATIVE_CALL_ARG4_GREG)
3885 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3886 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
3887# if defined(IEMNATIVE_CALL_ARG5_GREG)
3888 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3889 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
3890# if defined(IEMNATIVE_CALL_ARG6_GREG)
3891 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3892 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3893 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
3894# if defined(IEMNATIVE_CALL_ARG7_GREG)
3895 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3896 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3897 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
3898# endif
3899# endif
3900# endif
3901#endif
3902};
3903
3904#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
3905/**
3906 * BP offset of the stack argument slots.
3907 *
3908 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
3909 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
3910 */
3911DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
3912{
3913 IEMNATIVE_FP_OFF_STACK_ARG0,
3914# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
3915 IEMNATIVE_FP_OFF_STACK_ARG1,
3916# endif
3917# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
3918 IEMNATIVE_FP_OFF_STACK_ARG2,
3919# endif
3920# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
3921 IEMNATIVE_FP_OFF_STACK_ARG3,
3922# endif
3923};
3924AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
3925#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
3926
3927/**
3928 * Info about shadowed guest register values.
3929 * @see IEMNATIVEGSTREG
3930 */
3931DECL_HIDDEN_CONST(IEMANTIVEGSTREGINFO const) g_aGstShadowInfo[] =
3932{
3933#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
3934 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
3935 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
3936 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
3937 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
3938 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
3939 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
3940 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
3941 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
3942 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
3943 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
3944 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
3945 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
3946 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
3947 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
3948 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
3949 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
3950 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
3951 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
3952 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
3953 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
3954 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
3955 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
3956 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
3957 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
3958 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
3959 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
3960 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
3961 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
3962 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
3963 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
3964 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
3965 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
3966 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
3967 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
3968 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
3969 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
3970 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
3971 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
3972 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
3973 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
3974 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
3975 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
3976 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
3977 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
3978 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
3979 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
3980 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
3981 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
3982#undef CPUMCTX_OFF_AND_SIZE
3983};
3984AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
3985
3986
3987/** Host CPU general purpose register names. */
3988DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
3989{
3990#ifdef RT_ARCH_AMD64
3991 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
3992#elif RT_ARCH_ARM64
3993 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
3994 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
3995#else
3996# error "port me"
3997#endif
3998};
3999
4000
4001#if 0 /* unused */
4002/**
4003 * Tries to locate a suitable register in the given register mask.
4004 *
4005 * This ASSUMES the caller has done the minimal/optimal allocation checks and
4006 * failed.
4007 *
4008 * @returns Host register number on success, returns UINT8_MAX on failure.
4009 */
4010static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
4011{
4012 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
4013 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
4014 if (fRegs)
4015 {
4016 /** @todo pick better here: */
4017 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
4018
4019 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4020 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4021 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4022 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4023
4024 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4025 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4026 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4027 return idxReg;
4028 }
4029 return UINT8_MAX;
4030}
4031#endif /* unused */
4032
4033
4034#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4035/**
4036 * Stores the host reg @a idxHstReg into guest shadow register @a enmGstReg.
4037 *
4038 * @returns New code buffer offset on success, UINT32_MAX on failure.
4039 * @param pReNative .
4040 * @param off The current code buffer position.
4041 * @param enmGstReg The guest register to store to.
4042 * @param idxHstReg The host register to store from.
4043 */
4044DECL_FORCE_INLINE_THROW(uint32_t)
4045iemNativeEmitStoreGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg, uint8_t idxHstReg)
4046{
4047 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
4048 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
4049
4050 switch (g_aGstShadowInfo[enmGstReg].cb)
4051 {
4052 case sizeof(uint64_t):
4053 return iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4054 case sizeof(uint32_t):
4055 return iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4056 case sizeof(uint16_t):
4057 return iemNativeEmitStoreGprToVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4058#if 0 /* not present in the table. */
4059 case sizeof(uint8_t):
4060 return iemNativeEmitStoreGprToVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4061#endif
4062 default:
4063 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
4064 }
4065}
4066
4067
4068/**
4069 * Emits code to flush a pending write of the given guest register if any.
4070 *
4071 * @returns New code buffer offset.
4072 * @param pReNative The native recompile state.
4073 * @param off Current code buffer position.
4074 * @param enmGstReg The guest register to flush.
4075 */
4076DECL_HIDDEN_THROW(uint32_t)
4077iemNativeRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg)
4078{
4079 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4080
4081 Assert(enmGstReg >= kIemNativeGstReg_GprFirst && enmGstReg <= kIemNativeGstReg_GprLast);
4082 Assert( idxHstReg != UINT8_MAX
4083 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg));
4084 Log12(("iemNativeRegFlushPendingWrite: Clearing guest register %s shadowed by host %s\n",
4085 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
4086
4087 off = iemNativeEmitStoreGprWithGstShadowReg(pReNative, off, enmGstReg, idxHstReg);
4088
4089 pReNative->Core.bmGstRegShadowDirty &= ~RT_BIT_64(enmGstReg);
4090 return off;
4091}
4092
4093
4094/**
4095 * Flush the given set of guest registers if marked as dirty.
4096 *
4097 * @returns New code buffer offset.
4098 * @param pReNative The native recompile state.
4099 * @param off Current code buffer position.
4100 * @param fFlushGstReg The guest register set to flush (default is flush everything).
4101 */
4102DECL_HIDDEN_THROW(uint32_t)
4103iemNativeRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstReg /*= UINT64_MAX*/)
4104{
4105 if (pReNative->Core.bmGstRegShadowDirty & fFlushGstReg)
4106 {
4107 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fFlushGstReg;
4108 uint32_t idxGstReg = 0;
4109
4110 do
4111 {
4112 if (bmGstRegShadowDirty & 0x1)
4113 {
4114 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
4115 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4116 }
4117 idxGstReg++;
4118 bmGstRegShadowDirty >>= 1;
4119 } while (bmGstRegShadowDirty);
4120 }
4121
4122 return off;
4123}
4124
4125
4126/**
4127 * Flush all shadowed guest registers marked as dirty for the given host register.
4128 *
4129 * @returns New code buffer offset.
4130 * @param pReNative The native recompile state.
4131 * @param off Current code buffer position.
4132 * @param idxHstReg The host register.
4133 *
4134 * @note This doesn't do any unshadowing of guest registers from the host register.
4135 */
4136DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushDirtyGuestByHostRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg)
4137{
4138 /* We need to flush any pending guest register writes this host register shadows. */
4139 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4140 if (pReNative->Core.bmGstRegShadowDirty & fGstRegShadows)
4141 {
4142 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fGstRegShadows;
4143 uint32_t idxGstReg = 0;
4144 do
4145 {
4146 if (bmGstRegShadowDirty & 0x1)
4147 {
4148 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
4149 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4150 }
4151 idxGstReg++;
4152 bmGstRegShadowDirty >>= 1;
4153 } while (bmGstRegShadowDirty);
4154 }
4155
4156 return off;
4157}
4158#endif
4159
4160
4161/**
4162 * Locate a register, possibly freeing one up.
4163 *
4164 * This ASSUMES the caller has done the minimal/optimal allocation checks and
4165 * failed.
4166 *
4167 * @returns Host register number on success. Returns UINT8_MAX if no registers
4168 * found, the caller is supposed to deal with this and raise a
4169 * allocation type specific status code (if desired).
4170 *
4171 * @throws VBox status code if we're run into trouble spilling a variable of
4172 * recording debug info. Does NOT throw anything if we're out of
4173 * registers, though.
4174 */
4175static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
4176 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
4177{
4178 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
4179 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
4180 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
4181
4182 /*
4183 * Try a freed register that's shadowing a guest register.
4184 */
4185 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
4186 if (fRegs)
4187 {
4188 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
4189
4190#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4191 /*
4192 * When we have livness information, we use it to kick out all shadowed
4193 * guest register that will not be needed any more in this TB. If we're
4194 * lucky, this may prevent us from ending up here again.
4195 *
4196 * Note! We must consider the previous entry here so we don't free
4197 * anything that the current threaded function requires (current
4198 * entry is produced by the next threaded function).
4199 */
4200 uint32_t const idxCurCall = pReNative->idxCurCall;
4201 if (idxCurCall > 0)
4202 {
4203 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
4204
4205# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4206 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
4207 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
4208 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
4209#else
4210 /* Construct a mask of the registers not in the read or write state.
4211 Note! We could skips writes, if they aren't from us, as this is just
4212 a hack to prevent trashing registers that have just been written
4213 or will be written when we retire the current instruction. */
4214 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
4215 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
4216 & IEMLIVENESSBIT_MASK;
4217#endif
4218 /* Merge EFLAGS. */
4219 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
4220 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
4221 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
4222 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
4223 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
4224
4225 /* If it matches any shadowed registers. */
4226 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
4227 {
4228#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4229 /* Writeback any dirty shadow registers we are about to unshadow. */
4230 *poff = iemNativeRegFlushDirtyGuest(pReNative, *poff, fToFreeMask);
4231#endif
4232
4233 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
4234 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
4235 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
4236
4237 /* See if we've got any unshadowed registers we can return now. */
4238 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
4239 if (fUnshadowedRegs)
4240 {
4241 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
4242 return (fPreferVolatile
4243 ? ASMBitFirstSetU32(fUnshadowedRegs)
4244 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4245 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
4246 - 1;
4247 }
4248 }
4249 }
4250#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4251
4252 unsigned const idxReg = (fPreferVolatile
4253 ? ASMBitFirstSetU32(fRegs)
4254 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4255 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
4256 - 1;
4257
4258 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4259 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4260 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4261 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4262
4263#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4264 /* We need to flush any pending guest register writes this host register shadows. */
4265 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
4266#endif
4267
4268 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4269 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4270 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4271 return idxReg;
4272 }
4273
4274 /*
4275 * Try free up a variable that's in a register.
4276 *
4277 * We do two rounds here, first evacuating variables we don't need to be
4278 * saved on the stack, then in the second round move things to the stack.
4279 */
4280 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
4281 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
4282 {
4283 uint32_t fVars = pReNative->Core.bmVars;
4284 while (fVars)
4285 {
4286 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
4287 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
4288#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4289 if (pReNative->Core.aVars[idxVar].fSimdReg) /* Need to ignore SIMD variables here or we end up freeing random registers. */
4290 continue;
4291#endif
4292
4293 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
4294 && (RT_BIT_32(idxReg) & fRegMask)
4295 && ( iLoop == 0
4296 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
4297 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
4298 && !pReNative->Core.aVars[idxVar].fRegAcquired)
4299 {
4300 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
4301 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
4302 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4303 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4304 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4305 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
4306#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4307 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
4308#endif
4309
4310 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
4311 {
4312 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
4313 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
4314 }
4315
4316 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4317 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
4318
4319 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4320 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4321 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4322 return idxReg;
4323 }
4324 fVars &= ~RT_BIT_32(idxVar);
4325 }
4326 }
4327
4328 return UINT8_MAX;
4329}
4330
4331
4332/**
4333 * Reassigns a variable to a different register specified by the caller.
4334 *
4335 * @returns The new code buffer position.
4336 * @param pReNative The native recompile state.
4337 * @param off The current code buffer position.
4338 * @param idxVar The variable index.
4339 * @param idxRegOld The old host register number.
4340 * @param idxRegNew The new host register number.
4341 * @param pszCaller The caller for logging.
4342 */
4343static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4344 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
4345{
4346 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4347 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
4348#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4349 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4350#endif
4351 RT_NOREF(pszCaller);
4352
4353 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
4354
4355 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
4356#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4357 Assert(!(fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
4358#endif
4359 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
4360 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
4361 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
4362
4363 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
4364 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
4365 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
4366 if (fGstRegShadows)
4367 {
4368 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
4369 | RT_BIT_32(idxRegNew);
4370 while (fGstRegShadows)
4371 {
4372 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4373 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4374
4375 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
4376 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
4377 }
4378 }
4379
4380 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
4381 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
4382 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
4383 return off;
4384}
4385
4386
4387/**
4388 * Moves a variable to a different register or spills it onto the stack.
4389 *
4390 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
4391 * kinds can easily be recreated if needed later.
4392 *
4393 * @returns The new code buffer position.
4394 * @param pReNative The native recompile state.
4395 * @param off The current code buffer position.
4396 * @param idxVar The variable index.
4397 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
4398 * call-volatile registers.
4399 */
4400DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4401 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_GREG_MASK*/)
4402{
4403 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4404 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4405 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
4406 Assert(!pVar->fRegAcquired);
4407
4408 uint8_t const idxRegOld = pVar->idxReg;
4409 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
4410 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
4411 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
4412 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
4413 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
4414 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4415 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
4416 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
4417#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4418 Assert(!(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
4419#endif
4420
4421
4422 /** @todo Add statistics on this.*/
4423 /** @todo Implement basic variable liveness analysis (python) so variables
4424 * can be freed immediately once no longer used. This has the potential to
4425 * be trashing registers and stack for dead variables.
4426 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
4427
4428 /*
4429 * First try move it to a different register, as that's cheaper.
4430 */
4431 fForbiddenRegs |= RT_BIT_32(idxRegOld);
4432 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
4433 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
4434 if (fRegs)
4435 {
4436 /* Avoid using shadow registers, if possible. */
4437 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
4438 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
4439 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
4440 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
4441 }
4442
4443 /*
4444 * Otherwise we must spill the register onto the stack.
4445 */
4446 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
4447 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
4448 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
4449 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
4450
4451 pVar->idxReg = UINT8_MAX;
4452 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
4453 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
4454 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
4455 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
4456 return off;
4457}
4458
4459
4460/**
4461 * Allocates a temporary host general purpose register.
4462 *
4463 * This may emit code to save register content onto the stack in order to free
4464 * up a register.
4465 *
4466 * @returns The host register number; throws VBox status code on failure,
4467 * so no need to check the return value.
4468 * @param pReNative The native recompile state.
4469 * @param poff Pointer to the variable with the code buffer position.
4470 * This will be update if we need to move a variable from
4471 * register to stack in order to satisfy the request.
4472 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4473 * registers (@c true, default) or the other way around
4474 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4475 */
4476DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
4477{
4478 /*
4479 * Try find a completely unused register, preferably a call-volatile one.
4480 */
4481 uint8_t idxReg;
4482 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4483 & ~pReNative->Core.bmHstRegsWithGstShadow
4484 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
4485 if (fRegs)
4486 {
4487 if (fPreferVolatile)
4488 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4489 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4490 else
4491 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4492 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4493 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4494 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4495 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4496 }
4497 else
4498 {
4499 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
4500 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4501 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4502 }
4503 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4504}
4505
4506
4507/**
4508 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
4509 * registers.
4510 *
4511 * @returns The host register number; throws VBox status code on failure,
4512 * so no need to check the return value.
4513 * @param pReNative The native recompile state.
4514 * @param poff Pointer to the variable with the code buffer position.
4515 * This will be update if we need to move a variable from
4516 * register to stack in order to satisfy the request.
4517 * @param fRegMask Mask of acceptable registers.
4518 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4519 * registers (@c true, default) or the other way around
4520 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4521 */
4522DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
4523 bool fPreferVolatile /*= true*/)
4524{
4525 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
4526 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
4527
4528 /*
4529 * Try find a completely unused register, preferably a call-volatile one.
4530 */
4531 uint8_t idxReg;
4532 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4533 & ~pReNative->Core.bmHstRegsWithGstShadow
4534 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
4535 & fRegMask;
4536 if (fRegs)
4537 {
4538 if (fPreferVolatile)
4539 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4540 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4541 else
4542 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4543 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4544 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4545 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4546 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4547 }
4548 else
4549 {
4550 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
4551 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4552 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4553 }
4554 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4555}
4556
4557
4558/**
4559 * Allocates a temporary register for loading an immediate value into.
4560 *
4561 * This will emit code to load the immediate, unless there happens to be an
4562 * unused register with the value already loaded.
4563 *
4564 * The caller will not modify the returned register, it must be considered
4565 * read-only. Free using iemNativeRegFreeTmpImm.
4566 *
4567 * @returns The host register number; throws VBox status code on failure, so no
4568 * need to check the return value.
4569 * @param pReNative The native recompile state.
4570 * @param poff Pointer to the variable with the code buffer position.
4571 * @param uImm The immediate value that the register must hold upon
4572 * return.
4573 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4574 * registers (@c true, default) or the other way around
4575 * (@c false).
4576 *
4577 * @note Reusing immediate values has not been implemented yet.
4578 */
4579DECL_HIDDEN_THROW(uint8_t)
4580iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
4581{
4582 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
4583 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
4584 return idxReg;
4585}
4586
4587
4588/**
4589 * Allocates a temporary host general purpose register for keeping a guest
4590 * register value.
4591 *
4592 * Since we may already have a register holding the guest register value,
4593 * code will be emitted to do the loading if that's not the case. Code may also
4594 * be emitted if we have to free up a register to satify the request.
4595 *
4596 * @returns The host register number; throws VBox status code on failure, so no
4597 * need to check the return value.
4598 * @param pReNative The native recompile state.
4599 * @param poff Pointer to the variable with the code buffer
4600 * position. This will be update if we need to move a
4601 * variable from register to stack in order to satisfy
4602 * the request.
4603 * @param enmGstReg The guest register that will is to be updated.
4604 * @param enmIntendedUse How the caller will be using the host register.
4605 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
4606 * register is okay (default). The ASSUMPTION here is
4607 * that the caller has already flushed all volatile
4608 * registers, so this is only applied if we allocate a
4609 * new register.
4610 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
4611 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
4612 */
4613DECL_HIDDEN_THROW(uint8_t)
4614iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
4615 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
4616 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
4617{
4618 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4619#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4620 AssertMsg( fSkipLivenessAssert
4621 || pReNative->idxCurCall == 0
4622 || enmGstReg == kIemNativeGstReg_Pc
4623 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4624 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4625 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
4626 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4627 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
4628 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4629#endif
4630 RT_NOREF(fSkipLivenessAssert);
4631#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
4632 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
4633#endif
4634 uint32_t const fRegMask = !fNoVolatileRegs
4635 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
4636 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4637
4638#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4639 /** @todo r=aeichner Implement for registers other than GPR as well. */
4640 if ( ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4641 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
4642 && enmGstReg >= kIemNativeGstReg_GprFirst
4643 && enmGstReg <= kIemNativeGstReg_GprLast
4644 )
4645 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
4646#endif
4647
4648 /*
4649 * First check if the guest register value is already in a host register.
4650 */
4651 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4652 {
4653 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4654 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4655 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4656 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4657
4658 /* It's not supposed to be allocated... */
4659 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4660 {
4661 /*
4662 * If the register will trash the guest shadow copy, try find a
4663 * completely unused register we can use instead. If that fails,
4664 * we need to disassociate the host reg from the guest reg.
4665 */
4666 /** @todo would be nice to know if preserving the register is in any way helpful. */
4667 /* If the purpose is calculations, try duplicate the register value as
4668 we'll be clobbering the shadow. */
4669 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
4670 && ( ~pReNative->Core.bmHstRegs
4671 & ~pReNative->Core.bmHstRegsWithGstShadow
4672 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
4673 {
4674 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
4675
4676 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4677
4678 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4679 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4680 g_apszIemNativeHstRegNames[idxRegNew]));
4681 idxReg = idxRegNew;
4682 }
4683 /* If the current register matches the restrictions, go ahead and allocate
4684 it for the caller. */
4685 else if (fRegMask & RT_BIT_32(idxReg))
4686 {
4687 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4688 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4689 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4690 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4691 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
4692 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4693 else
4694 {
4695 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
4696 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
4697 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4698 }
4699 }
4700 /* Otherwise, allocate a register that satisfies the caller and transfer
4701 the shadowing if compatible with the intended use. (This basically
4702 means the call wants a non-volatile register (RSP push/pop scenario).) */
4703 else
4704 {
4705 Assert(fNoVolatileRegs);
4706 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
4707 !fNoVolatileRegs
4708 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
4709 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4710 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4711 {
4712 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4713 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
4714 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
4715 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4716 }
4717 else
4718 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4719 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4720 g_apszIemNativeHstRegNames[idxRegNew]));
4721 idxReg = idxRegNew;
4722 }
4723 }
4724 else
4725 {
4726 /*
4727 * Oops. Shadowed guest register already allocated!
4728 *
4729 * Allocate a new register, copy the value and, if updating, the
4730 * guest shadow copy assignment to the new register.
4731 */
4732 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4733 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
4734 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
4735 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
4736
4737 /** @todo share register for readonly access. */
4738 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
4739 enmIntendedUse == kIemNativeGstRegUse_Calculation);
4740
4741 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4742 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4743
4744 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4745 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4746 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
4747 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4748 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4749 else
4750 {
4751 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4752 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
4753 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4754 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4755 }
4756 idxReg = idxRegNew;
4757 }
4758 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
4759
4760#ifdef VBOX_STRICT
4761 /* Strict builds: Check that the value is correct. */
4762 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4763#endif
4764
4765 return idxReg;
4766 }
4767
4768 /*
4769 * Allocate a new register, load it with the guest value and designate it as a copy of the
4770 */
4771 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
4772
4773 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4774 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
4775
4776 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4777 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
4778 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
4779 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4780
4781 return idxRegNew;
4782}
4783
4784
4785/**
4786 * Allocates a temporary host general purpose register that already holds the
4787 * given guest register value.
4788 *
4789 * The use case for this function is places where the shadowing state cannot be
4790 * modified due to branching and such. This will fail if the we don't have a
4791 * current shadow copy handy or if it's incompatible. The only code that will
4792 * be emitted here is value checking code in strict builds.
4793 *
4794 * The intended use can only be readonly!
4795 *
4796 * @returns The host register number, UINT8_MAX if not present.
4797 * @param pReNative The native recompile state.
4798 * @param poff Pointer to the instruction buffer offset.
4799 * Will be updated in strict builds if a register is
4800 * found.
4801 * @param enmGstReg The guest register that will is to be updated.
4802 * @note In strict builds, this may throw instruction buffer growth failures.
4803 * Non-strict builds will not throw anything.
4804 * @sa iemNativeRegAllocTmpForGuestReg
4805 */
4806DECL_HIDDEN_THROW(uint8_t)
4807iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4808{
4809 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4810#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4811 AssertMsg( pReNative->idxCurCall == 0
4812 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4813 || enmGstReg == kIemNativeGstReg_Pc,
4814 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4815#endif
4816
4817 /*
4818 * First check if the guest register value is already in a host register.
4819 */
4820 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4821 {
4822 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4823 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4824 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4825 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4826
4827 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4828 {
4829 /*
4830 * We only do readonly use here, so easy compared to the other
4831 * variant of this code.
4832 */
4833 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4834 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4835 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4836 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
4837 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4838
4839#ifdef VBOX_STRICT
4840 /* Strict builds: Check that the value is correct. */
4841 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4842#else
4843 RT_NOREF(poff);
4844#endif
4845 return idxReg;
4846 }
4847 }
4848
4849 return UINT8_MAX;
4850}
4851
4852
4853/**
4854 * Allocates argument registers for a function call.
4855 *
4856 * @returns New code buffer offset on success; throws VBox status code on failure, so no
4857 * need to check the return value.
4858 * @param pReNative The native recompile state.
4859 * @param off The current code buffer offset.
4860 * @param cArgs The number of arguments the function call takes.
4861 */
4862DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
4863{
4864 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
4865 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
4866 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4867 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4868
4869 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4870 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4871 else if (cArgs == 0)
4872 return true;
4873
4874 /*
4875 * Do we get luck and all register are free and not shadowing anything?
4876 */
4877 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
4878 for (uint32_t i = 0; i < cArgs; i++)
4879 {
4880 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4881 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4882 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4883 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4884 }
4885 /*
4886 * Okay, not lucky so we have to free up the registers.
4887 */
4888 else
4889 for (uint32_t i = 0; i < cArgs; i++)
4890 {
4891 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4892 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
4893 {
4894 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4895 {
4896 case kIemNativeWhat_Var:
4897 {
4898 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4899 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4900 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
4901 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4902 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
4903#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4904 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4905#endif
4906
4907 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
4908 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4909 else
4910 {
4911 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4912 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4913 }
4914 break;
4915 }
4916
4917 case kIemNativeWhat_Tmp:
4918 case kIemNativeWhat_Arg:
4919 case kIemNativeWhat_rc:
4920 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4921 default:
4922 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
4923 }
4924
4925 }
4926 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4927 {
4928 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4929 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4930 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4931#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4932 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
4933#endif
4934 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4935 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4936 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4937 }
4938 else
4939 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4940 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4941 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4942 }
4943 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
4944 return true;
4945}
4946
4947
4948DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
4949
4950
4951#if 0
4952/**
4953 * Frees a register assignment of any type.
4954 *
4955 * @param pReNative The native recompile state.
4956 * @param idxHstReg The register to free.
4957 *
4958 * @note Does not update variables.
4959 */
4960DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4961{
4962 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4963 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4964 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
4965 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
4966 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
4967 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
4968 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
4969 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
4970 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
4971 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
4972 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4973 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4974 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4975 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4976
4977 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4978 /* no flushing, right:
4979 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4980 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4981 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4982 */
4983}
4984#endif
4985
4986
4987/**
4988 * Frees a temporary register.
4989 *
4990 * Any shadow copies of guest registers assigned to the host register will not
4991 * be flushed by this operation.
4992 */
4993DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4994{
4995 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4996 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
4997 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4998 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
4999 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5000}
5001
5002
5003/**
5004 * Frees a temporary immediate register.
5005 *
5006 * It is assumed that the call has not modified the register, so it still hold
5007 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
5008 */
5009DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
5010{
5011 iemNativeRegFreeTmp(pReNative, idxHstReg);
5012}
5013
5014
5015/**
5016 * Frees a register assigned to a variable.
5017 *
5018 * The register will be disassociated from the variable.
5019 */
5020DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
5021{
5022 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
5023 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
5024 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
5025 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5026 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
5027#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5028 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
5029#endif
5030
5031 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
5032 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
5033 if (!fFlushShadows)
5034 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
5035 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
5036 else
5037 {
5038 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5039 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5040#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5041 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadowsOld));
5042#endif
5043 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5044 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
5045 uint64_t fGstRegShadows = fGstRegShadowsOld;
5046 while (fGstRegShadows)
5047 {
5048 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
5049 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
5050
5051 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
5052 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
5053 }
5054 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
5055 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
5056 }
5057}
5058
5059
5060#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5061# ifdef LOG_ENABLED
5062/** Host CPU SIMD register names. */
5063DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
5064{
5065# ifdef RT_ARCH_AMD64
5066 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
5067# elif RT_ARCH_ARM64
5068 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
5069 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
5070# else
5071# error "port me"
5072# endif
5073};
5074# endif
5075
5076
5077/**
5078 * Frees a SIMD register assigned to a variable.
5079 *
5080 * The register will be disassociated from the variable.
5081 */
5082DECLHIDDEN(void) iemNativeSimdRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
5083{
5084 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstReg));
5085 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
5086 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
5087 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5088 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
5089 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
5090
5091 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
5092 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
5093 if (!fFlushShadows)
5094 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
5095 g_apszIemNativeHstSimdRegNames[idxHstReg], pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows, idxVar));
5096 else
5097 {
5098 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5099 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows;
5100 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
5101 pReNative->Core.bmGstSimdRegShadows &= ~fGstRegShadowsOld;
5102 uint64_t fGstRegShadows = fGstRegShadowsOld;
5103 while (fGstRegShadows)
5104 {
5105 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
5106 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
5107
5108 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxHstReg);
5109 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = UINT8_MAX;
5110 }
5111 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
5112 g_apszIemNativeHstSimdRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
5113 }
5114}
5115#endif
5116
5117
5118/**
5119 * Called right before emitting a call instruction to move anything important
5120 * out of call-volatile registers, free and flush the call-volatile registers,
5121 * optionally freeing argument variables.
5122 *
5123 * @returns New code buffer offset, UINT32_MAX on failure.
5124 * @param pReNative The native recompile state.
5125 * @param off The code buffer offset.
5126 * @param cArgs The number of arguments the function call takes.
5127 * It is presumed that the host register part of these have
5128 * been allocated as such already and won't need moving,
5129 * just freeing.
5130 * @param fKeepVars Mask of variables that should keep their register
5131 * assignments. Caller must take care to handle these.
5132 */
5133DECL_HIDDEN_THROW(uint32_t)
5134iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
5135{
5136 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
5137
5138 /* fKeepVars will reduce this mask. */
5139 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
5140
5141 /*
5142 * Move anything important out of volatile registers.
5143 */
5144 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
5145 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
5146 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
5147#ifdef IEMNATIVE_REG_FIXED_TMP0
5148 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
5149#endif
5150#ifdef IEMNATIVE_REG_FIXED_TMP1
5151 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
5152#endif
5153#ifdef IEMNATIVE_REG_FIXED_PC_DBG
5154 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
5155#endif
5156 & ~g_afIemNativeCallRegs[cArgs];
5157
5158 fRegsToMove &= pReNative->Core.bmHstRegs;
5159 if (!fRegsToMove)
5160 { /* likely */ }
5161 else
5162 {
5163 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
5164 while (fRegsToMove != 0)
5165 {
5166 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
5167 fRegsToMove &= ~RT_BIT_32(idxReg);
5168
5169 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
5170 {
5171 case kIemNativeWhat_Var:
5172 {
5173 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
5174 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5175 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
5176 Assert(pVar->idxReg == idxReg);
5177 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
5178 {
5179 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
5180 idxVar, pVar->enmKind, pVar->idxReg));
5181 if (pVar->enmKind != kIemNativeVarKind_Stack)
5182 pVar->idxReg = UINT8_MAX;
5183 else
5184 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
5185 }
5186 else
5187 fRegsToFree &= ~RT_BIT_32(idxReg);
5188 continue;
5189 }
5190
5191 case kIemNativeWhat_Arg:
5192 AssertMsgFailed(("What?!?: %u\n", idxReg));
5193 continue;
5194
5195 case kIemNativeWhat_rc:
5196 case kIemNativeWhat_Tmp:
5197 AssertMsgFailed(("Missing free: %u\n", idxReg));
5198 continue;
5199
5200 case kIemNativeWhat_FixedTmp:
5201 case kIemNativeWhat_pVCpuFixed:
5202 case kIemNativeWhat_pCtxFixed:
5203 case kIemNativeWhat_PcShadow:
5204 case kIemNativeWhat_FixedReserved:
5205 case kIemNativeWhat_Invalid:
5206 case kIemNativeWhat_End:
5207 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
5208 }
5209 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
5210 }
5211 }
5212
5213 /*
5214 * Do the actual freeing.
5215 */
5216 if (pReNative->Core.bmHstRegs & fRegsToFree)
5217 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
5218 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
5219 pReNative->Core.bmHstRegs &= ~fRegsToFree;
5220
5221 /* If there are guest register shadows in any call-volatile register, we
5222 have to clear the corrsponding guest register masks for each register. */
5223 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
5224 if (fHstRegsWithGstShadow)
5225 {
5226 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
5227 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
5228 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
5229 do
5230 {
5231 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
5232 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5233
5234 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
5235#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5236 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
5237#endif
5238 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
5239 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
5240 } while (fHstRegsWithGstShadow != 0);
5241 }
5242
5243 return off;
5244}
5245
5246
5247/**
5248 * Flushes a set of guest register shadow copies.
5249 *
5250 * This is usually done after calling a threaded function or a C-implementation
5251 * of an instruction.
5252 *
5253 * @param pReNative The native recompile state.
5254 * @param fGstRegs Set of guest registers to flush.
5255 */
5256DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
5257{
5258 /*
5259 * Reduce the mask by what's currently shadowed
5260 */
5261 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
5262 fGstRegs &= bmGstRegShadowsOld;
5263 if (fGstRegs)
5264 {
5265 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
5266 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
5267 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
5268 if (bmGstRegShadowsNew)
5269 {
5270 /*
5271 * Partial.
5272 */
5273 do
5274 {
5275 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5276 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5277 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5278 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
5279 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5280#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5281 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
5282#endif
5283
5284 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
5285 fGstRegs &= ~fInThisHstReg;
5286 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5287 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5288 if (!fGstRegShadowsNew)
5289 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5290 } while (fGstRegs != 0);
5291 }
5292 else
5293 {
5294 /*
5295 * Clear all.
5296 */
5297 do
5298 {
5299 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5300 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5301 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5302 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
5303 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5304#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5305 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
5306#endif
5307
5308 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5309 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5310 } while (fGstRegs != 0);
5311 pReNative->Core.bmHstRegsWithGstShadow = 0;
5312 }
5313 }
5314}
5315
5316
5317/**
5318 * Flushes guest register shadow copies held by a set of host registers.
5319 *
5320 * This is used with the TLB lookup code for ensuring that we don't carry on
5321 * with any guest shadows in volatile registers, as these will get corrupted by
5322 * a TLB miss.
5323 *
5324 * @param pReNative The native recompile state.
5325 * @param fHstRegs Set of host registers to flush guest shadows for.
5326 */
5327DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
5328{
5329 /*
5330 * Reduce the mask by what's currently shadowed.
5331 */
5332 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
5333 fHstRegs &= bmHstRegsWithGstShadowOld;
5334 if (fHstRegs)
5335 {
5336 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
5337 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
5338 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
5339 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
5340 if (bmHstRegsWithGstShadowNew)
5341 {
5342 /*
5343 * Partial (likely).
5344 */
5345 uint64_t fGstShadows = 0;
5346 do
5347 {
5348 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5349 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
5350 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
5351 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
5352#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5353 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5354#endif
5355
5356 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5357 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5358 fHstRegs &= ~RT_BIT_32(idxHstReg);
5359 } while (fHstRegs != 0);
5360 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
5361 }
5362 else
5363 {
5364 /*
5365 * Clear all.
5366 */
5367 do
5368 {
5369 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5370 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
5371 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
5372 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
5373#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5374 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5375#endif
5376
5377 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5378 fHstRegs &= ~RT_BIT_32(idxHstReg);
5379 } while (fHstRegs != 0);
5380 pReNative->Core.bmGstRegShadows = 0;
5381 }
5382 }
5383}
5384
5385
5386/**
5387 * Restores guest shadow copies in volatile registers.
5388 *
5389 * This is used after calling a helper function (think TLB miss) to restore the
5390 * register state of volatile registers.
5391 *
5392 * @param pReNative The native recompile state.
5393 * @param off The code buffer offset.
5394 * @param fHstRegsActiveShadows Set of host registers which are allowed to
5395 * be active (allocated) w/o asserting. Hack.
5396 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
5397 * iemNativeVarRestoreVolatileRegsPostHlpCall()
5398 */
5399DECL_HIDDEN_THROW(uint32_t)
5400iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
5401{
5402 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
5403 if (fHstRegs)
5404 {
5405 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
5406 do
5407 {
5408 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5409
5410 /* It's not fatal if a register is active holding a variable that
5411 shadowing a guest register, ASSUMING all pending guest register
5412 writes were flushed prior to the helper call. However, we'll be
5413 emitting duplicate restores, so it wasts code space. */
5414 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
5415 RT_NOREF(fHstRegsActiveShadows);
5416
5417 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5418#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5419 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadows));
5420#endif
5421 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
5422 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
5423 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
5424
5425 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
5426 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
5427
5428 fHstRegs &= ~RT_BIT_32(idxHstReg);
5429 } while (fHstRegs != 0);
5430 }
5431 return off;
5432}
5433
5434
5435
5436
5437/*********************************************************************************************************************************
5438* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
5439*********************************************************************************************************************************/
5440#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5441
5442/**
5443 * Info about shadowed guest SIMD register values.
5444 * @see IEMNATIVEGSTSIMDREG
5445 */
5446static struct
5447{
5448 /** Offset in VMCPU of XMM (low 128-bit) registers. */
5449 uint32_t offXmm;
5450 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
5451 uint32_t offYmm;
5452 /** Name (for logging). */
5453 const char *pszName;
5454} const g_aGstSimdShadowInfo[] =
5455{
5456#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
5457 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
5458 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
5459 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
5460 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
5461 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
5462 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
5463 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
5464 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
5465 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
5466 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
5467 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
5468 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
5469 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
5470 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
5471 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
5472 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
5473 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
5474#undef CPUMCTX_OFF_AND_SIZE
5475};
5476AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
5477
5478
5479/**
5480 * Frees a temporary SIMD register.
5481 *
5482 * Any shadow copies of guest registers assigned to the host register will not
5483 * be flushed by this operation.
5484 */
5485DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
5486{
5487 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
5488 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
5489 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5490 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
5491 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5492}
5493
5494
5495/**
5496 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
5497 *
5498 * @returns New code bufferoffset.
5499 * @param pReNative The native recompile state.
5500 * @param off Current code buffer position.
5501 * @param enmGstSimdReg The guest SIMD register to flush.
5502 */
5503DECL_HIDDEN_THROW(uint32_t)
5504iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
5505{
5506 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5507
5508 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
5509 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
5510 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg),
5511 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)));
5512
5513 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
5514 {
5515 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5516 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
5517 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5518 }
5519
5520 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))
5521 {
5522 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5523 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
5524 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5525 }
5526
5527 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, enmGstSimdReg);
5528 return off;
5529}
5530
5531
5532/**
5533 * Locate a register, possibly freeing one up.
5534 *
5535 * This ASSUMES the caller has done the minimal/optimal allocation checks and
5536 * failed.
5537 *
5538 * @returns Host register number on success. Returns UINT8_MAX if no registers
5539 * found, the caller is supposed to deal with this and raise a
5540 * allocation type specific status code (if desired).
5541 *
5542 * @throws VBox status code if we're run into trouble spilling a variable of
5543 * recording debug info. Does NOT throw anything if we're out of
5544 * registers, though.
5545 */
5546static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
5547 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
5548{
5549 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFree);
5550 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5551 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5552
5553 /*
5554 * Try a freed register that's shadowing a guest register.
5555 */
5556 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
5557 if (fRegs)
5558 {
5559 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeNoVar);
5560
5561#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5562 /*
5563 * When we have livness information, we use it to kick out all shadowed
5564 * guest register that will not be needed any more in this TB. If we're
5565 * lucky, this may prevent us from ending up here again.
5566 *
5567 * Note! We must consider the previous entry here so we don't free
5568 * anything that the current threaded function requires (current
5569 * entry is produced by the next threaded function).
5570 */
5571 uint32_t const idxCurCall = pReNative->idxCurCall;
5572 if (idxCurCall > 0)
5573 {
5574 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
5575
5576# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5577 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
5578 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
5579 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
5580#else
5581 /* Construct a mask of the registers not in the read or write state.
5582 Note! We could skips writes, if they aren't from us, as this is just
5583 a hack to prevent trashing registers that have just been written
5584 or will be written when we retire the current instruction. */
5585 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5586 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5587 & IEMLIVENESSBIT_MASK;
5588#endif
5589 /* If it matches any shadowed registers. */
5590 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
5591 {
5592 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessUnshadowed);
5593 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
5594 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
5595
5596 /* See if we've got any unshadowed registers we can return now. */
5597 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
5598 if (fUnshadowedRegs)
5599 {
5600 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessHelped);
5601 return (fPreferVolatile
5602 ? ASMBitFirstSetU32(fUnshadowedRegs)
5603 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
5604 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
5605 - 1;
5606 }
5607 }
5608 }
5609#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5610
5611 unsigned const idxReg = (fPreferVolatile
5612 ? ASMBitFirstSetU32(fRegs)
5613 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5614 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
5615 - 1;
5616
5617 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
5618 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
5619 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5620 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
5621
5622 /* We need to flush any pending guest register writes this host SIMD register shadows. */
5623 uint32_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5624 uint32_t idxGstSimdReg = 0;
5625 do
5626 {
5627 if (fGstRegShadows & 0x1)
5628 {
5629 *poff = iemNativeSimdRegFlushPendingWrite(pReNative, *poff, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5630 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg));
5631 }
5632 idxGstSimdReg++;
5633 fGstRegShadows >>= 1;
5634 } while (fGstRegShadows);
5635
5636 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5637 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5638 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5639 pReNative->Core.aHstSimdRegs[idxReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5640 return idxReg;
5641 }
5642
5643 AssertFailed(); /** @todo The following needs testing when it actually gets hit. */
5644
5645 /*
5646 * Try free up a variable that's in a register.
5647 *
5648 * We do two rounds here, first evacuating variables we don't need to be
5649 * saved on the stack, then in the second round move things to the stack.
5650 */
5651 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeVar);
5652 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
5653 {
5654 uint32_t fVars = pReNative->Core.bmVars;
5655 while (fVars)
5656 {
5657 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
5658 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
5659 if (!pReNative->Core.aVars[idxVar].fSimdReg) /* Ignore non SIMD variables here. */
5660 continue;
5661
5662 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
5663 && (RT_BIT_32(idxReg) & fRegMask)
5664 && ( iLoop == 0
5665 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
5666 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5667 && !pReNative->Core.aVars[idxVar].fRegAcquired)
5668 {
5669 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxReg));
5670 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
5671 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5672 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstSimdReg_End));
5673 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg))
5674 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
5675
5676 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5677 {
5678 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
5679 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
5680 }
5681
5682 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5683 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
5684
5685 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5686 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
5687 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5688 return idxReg;
5689 }
5690 fVars &= ~RT_BIT_32(idxVar);
5691 }
5692 }
5693
5694 AssertFailed();
5695 return UINT8_MAX;
5696}
5697
5698
5699/**
5700 * Flushes a set of guest register shadow copies.
5701 *
5702 * This is usually done after calling a threaded function or a C-implementation
5703 * of an instruction.
5704 *
5705 * @param pReNative The native recompile state.
5706 * @param fGstSimdRegs Set of guest SIMD registers to flush.
5707 */
5708DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
5709{
5710 /*
5711 * Reduce the mask by what's currently shadowed
5712 */
5713 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
5714 fGstSimdRegs &= bmGstSimdRegShadows;
5715 if (fGstSimdRegs)
5716 {
5717 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
5718 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
5719 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
5720 if (bmGstSimdRegShadowsNew)
5721 {
5722 /*
5723 * Partial.
5724 */
5725 do
5726 {
5727 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5728 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5729 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5730 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5731 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5732 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5733
5734 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
5735 fGstSimdRegs &= ~fInThisHstReg;
5736 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5737 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5738 if (!fGstRegShadowsNew)
5739 {
5740 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5741 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5742 }
5743 } while (fGstSimdRegs != 0);
5744 }
5745 else
5746 {
5747 /*
5748 * Clear all.
5749 */
5750 do
5751 {
5752 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5753 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5754 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5755 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5756 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5757 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5758
5759 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5760 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
5761 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5762 } while (fGstSimdRegs != 0);
5763 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
5764 }
5765 }
5766}
5767
5768
5769/**
5770 * Allocates a temporary host SIMD register.
5771 *
5772 * This may emit code to save register content onto the stack in order to free
5773 * up a register.
5774 *
5775 * @returns The host register number; throws VBox status code on failure,
5776 * so no need to check the return value.
5777 * @param pReNative The native recompile state.
5778 * @param poff Pointer to the variable with the code buffer position.
5779 * This will be update if we need to move a variable from
5780 * register to stack in order to satisfy the request.
5781 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5782 * registers (@c true, default) or the other way around
5783 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5784 */
5785DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
5786{
5787 /*
5788 * Try find a completely unused register, preferably a call-volatile one.
5789 */
5790 uint8_t idxSimdReg;
5791 uint32_t fRegs = ~pReNative->Core.bmHstRegs
5792 & ~pReNative->Core.bmHstRegsWithGstShadow
5793 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
5794 if (fRegs)
5795 {
5796 if (fPreferVolatile)
5797 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5798 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5799 else
5800 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5801 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5802 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5803 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5804
5805 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5806 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5807 }
5808 else
5809 {
5810 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
5811 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5812 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5813 }
5814
5815 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5816 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5817}
5818
5819
5820/**
5821 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
5822 * registers.
5823 *
5824 * @returns The host register number; throws VBox status code on failure,
5825 * so no need to check the return value.
5826 * @param pReNative The native recompile state.
5827 * @param poff Pointer to the variable with the code buffer position.
5828 * This will be update if we need to move a variable from
5829 * register to stack in order to satisfy the request.
5830 * @param fRegMask Mask of acceptable registers.
5831 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5832 * registers (@c true, default) or the other way around
5833 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5834 */
5835DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
5836 bool fPreferVolatile /*= true*/)
5837{
5838 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5839 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5840
5841 /*
5842 * Try find a completely unused register, preferably a call-volatile one.
5843 */
5844 uint8_t idxSimdReg;
5845 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
5846 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5847 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
5848 & fRegMask;
5849 if (fRegs)
5850 {
5851 if (fPreferVolatile)
5852 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5853 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5854 else
5855 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5856 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5857 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5858 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5859
5860 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5861 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5862 }
5863 else
5864 {
5865 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
5866 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5867 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5868 }
5869
5870 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5871 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5872}
5873
5874
5875/**
5876 * Sets the indiactor for which part of the given SIMD register has valid data loaded.
5877 *
5878 * @param pReNative The native recompile state.
5879 * @param idxHstSimdReg The host SIMD register to update the state for.
5880 * @param enmLoadSz The load size to set.
5881 */
5882DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg,
5883 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5884{
5885 /* Everything valid already? -> nothing to do. */
5886 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5887 return;
5888
5889 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid)
5890 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
5891 else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz)
5892 {
5893 Assert( ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128
5894 && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5895 || ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128
5896 && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128));
5897 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256;
5898 }
5899}
5900
5901
5902static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdRegDst,
5903 uint8_t idxHstSimdRegDst, uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
5904{
5905 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
5906 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst
5907 || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5908 {
5909# ifdef RT_ARCH_ARM64
5910 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
5911 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
5912# endif
5913
5914 if (idxHstSimdRegDst != idxHstSimdRegSrc)
5915 {
5916 switch (enmLoadSzDst)
5917 {
5918 case kIemNativeGstSimdRegLdStSz_256:
5919 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5920 break;
5921 case kIemNativeGstSimdRegLdStSz_Low128:
5922 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5923 break;
5924 case kIemNativeGstSimdRegLdStSz_High128:
5925 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5926 break;
5927 default:
5928 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5929 }
5930
5931 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst);
5932 }
5933 }
5934 else
5935 {
5936 /* The source doesn't has the part loaded, so load the register from CPUMCTX. */
5937 Assert(enmLoadSzDst == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSzDst == kIemNativeGstSimdRegLdStSz_High128);
5938 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, idxHstSimdRegDst, enmGstSimdRegDst, enmLoadSzDst);
5939 }
5940
5941 return off;
5942}
5943
5944
5945/**
5946 * Allocates a temporary host SIMD register for keeping a guest
5947 * SIMD register value.
5948 *
5949 * Since we may already have a register holding the guest register value,
5950 * code will be emitted to do the loading if that's not the case. Code may also
5951 * be emitted if we have to free up a register to satify the request.
5952 *
5953 * @returns The host register number; throws VBox status code on failure, so no
5954 * need to check the return value.
5955 * @param pReNative The native recompile state.
5956 * @param poff Pointer to the variable with the code buffer
5957 * position. This will be update if we need to move a
5958 * variable from register to stack in order to satisfy
5959 * the request.
5960 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
5961 * @param enmIntendedUse How the caller will be using the host register.
5962 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
5963 * register is okay (default). The ASSUMPTION here is
5964 * that the caller has already flushed all volatile
5965 * registers, so this is only applied if we allocate a
5966 * new register.
5967 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
5968 */
5969DECL_HIDDEN_THROW(uint8_t)
5970iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
5971 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
5972 bool fNoVolatileRegs /*= false*/)
5973{
5974 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
5975#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
5976 AssertMsg( pReNative->idxCurCall == 0
5977 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5978 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5979 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
5980 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5981 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
5982 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
5983#endif
5984#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5985 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
5986#endif
5987 uint32_t const fRegMask = !fNoVolatileRegs
5988 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
5989 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
5990
5991 /*
5992 * First check if the guest register value is already in a host register.
5993 */
5994 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
5995 {
5996 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5997 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
5998 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
5999 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
6000
6001 /* It's not supposed to be allocated... */
6002 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
6003 {
6004 /*
6005 * If the register will trash the guest shadow copy, try find a
6006 * completely unused register we can use instead. If that fails,
6007 * we need to disassociate the host reg from the guest reg.
6008 */
6009 /** @todo would be nice to know if preserving the register is in any way helpful. */
6010 /* If the purpose is calculations, try duplicate the register value as
6011 we'll be clobbering the shadow. */
6012 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
6013 && ( ~pReNative->Core.bmHstSimdRegs
6014 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
6015 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
6016 {
6017 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
6018
6019 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
6020
6021 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
6022 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
6023 g_apszIemNativeHstSimdRegNames[idxRegNew]));
6024 idxSimdReg = idxRegNew;
6025 }
6026 /* If the current register matches the restrictions, go ahead and allocate
6027 it for the caller. */
6028 else if (fRegMask & RT_BIT_32(idxSimdReg))
6029 {
6030 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
6031 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
6032 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
6033 {
6034 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
6035 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxSimdReg, idxSimdReg, enmLoadSz);
6036 else
6037 iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz);
6038 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
6039 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
6040 }
6041 else
6042 {
6043 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
6044 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
6045 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
6046 }
6047 }
6048 /* Otherwise, allocate a register that satisfies the caller and transfer
6049 the shadowing if compatible with the intended use. (This basically
6050 means the call wants a non-volatile register (RSP push/pop scenario).) */
6051 else
6052 {
6053 Assert(fNoVolatileRegs);
6054 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
6055 !fNoVolatileRegs
6056 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
6057 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
6058 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
6059 {
6060 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
6061 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
6062 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
6063 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
6064 }
6065 else
6066 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
6067 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
6068 g_apszIemNativeHstSimdRegNames[idxRegNew]));
6069 idxSimdReg = idxRegNew;
6070 }
6071 }
6072 else
6073 {
6074 /*
6075 * Oops. Shadowed guest register already allocated!
6076 *
6077 * Allocate a new register, copy the value and, if updating, the
6078 * guest shadow copy assignment to the new register.
6079 */
6080 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
6081 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
6082 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
6083 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
6084
6085 /** @todo share register for readonly access. */
6086 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
6087 enmIntendedUse == kIemNativeGstRegUse_Calculation);
6088
6089 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
6090 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
6091 else
6092 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
6093
6094 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
6095 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
6096 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
6097 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
6098 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
6099 else
6100 {
6101 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
6102 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
6103 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
6104 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
6105 }
6106 idxSimdReg = idxRegNew;
6107 }
6108 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
6109
6110#ifdef VBOX_STRICT
6111 /* Strict builds: Check that the value is correct. */
6112 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
6113 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
6114#endif
6115
6116 return idxSimdReg;
6117 }
6118
6119 /*
6120 * Allocate a new register, load it with the guest value and designate it as a copy of the
6121 */
6122 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
6123
6124 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
6125 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
6126 else
6127 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
6128
6129 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
6130 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
6131
6132 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
6133 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
6134
6135 return idxRegNew;
6136}
6137
6138#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6139
6140
6141
6142/*********************************************************************************************************************************
6143* Code emitters for flushing pending guest register writes and sanity checks *
6144*********************************************************************************************************************************/
6145
6146#ifdef VBOX_STRICT
6147/**
6148 * Does internal register allocator sanity checks.
6149 */
6150DECLHIDDEN(void) iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
6151{
6152 /*
6153 * Iterate host registers building a guest shadowing set.
6154 */
6155 uint64_t bmGstRegShadows = 0;
6156 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
6157 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
6158 while (bmHstRegsWithGstShadow)
6159 {
6160 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
6161 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
6162 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
6163
6164 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
6165 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
6166 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
6167 bmGstRegShadows |= fThisGstRegShadows;
6168 while (fThisGstRegShadows)
6169 {
6170 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
6171 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
6172 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
6173 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
6174 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
6175 }
6176 }
6177 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
6178 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
6179 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
6180
6181 /*
6182 * Now the other way around, checking the guest to host index array.
6183 */
6184 bmHstRegsWithGstShadow = 0;
6185 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
6186 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
6187 while (bmGstRegShadows)
6188 {
6189 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
6190 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
6191 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
6192
6193 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
6194 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
6195 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
6196 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
6197 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
6198 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
6199 }
6200 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
6201 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
6202 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
6203}
6204#endif /* VBOX_STRICT */
6205
6206
6207/**
6208 * Flushes any delayed guest register writes.
6209 *
6210 * This must be called prior to calling CImpl functions and any helpers that use
6211 * the guest state (like raising exceptions) and such.
6212 *
6213 * This optimization has not yet been implemented. The first target would be
6214 * RIP updates, since these are the most common ones.
6215 */
6216DECL_HIDDEN_THROW(uint32_t)
6217iemNativeRegFlushPendingWritesSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept, bool fFlushShadows)
6218{
6219#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6220 if (!(fGstShwExcept & kIemNativeGstReg_Pc))
6221 off = iemNativeEmitPcWriteback(pReNative, off);
6222#else
6223 RT_NOREF(pReNative, fGstShwExcept);
6224#endif
6225
6226#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
6227 off = iemNativeRegFlushDirtyGuest(pReNative, off, ~fGstShwExcept);
6228 if ( fFlushShadows
6229 && (pReNative->Core.bmGstRegShadows & ~fGstShwExcept))
6230 {
6231 uint64_t bmGstRegShadows = pReNative->Core.bmGstRegShadows & ~fGstShwExcept;
6232 uint8_t idxGstReg = 0;
6233 do
6234 {
6235 if (bmGstRegShadows & 0x1)
6236 {
6237 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
6238
6239 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
6240 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(idxGstReg));
6241 }
6242 idxGstReg++;
6243 bmGstRegShadows >>= 1;
6244 } while (bmGstRegShadows);
6245 }
6246#endif
6247
6248#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6249 /** @todo r=bird: There must be a quicker way to check if anything needs
6250 * doing and then call simd function to do the flushing */
6251 /** @todo This doesn't mix well with fGstShwExcept but we ignore this for now and just flush everything. */
6252 for (uint8_t idxGstSimdReg = 0; idxGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo); idxGstSimdReg++)
6253 {
6254 Assert( (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg)
6255 || !IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg)));
6256
6257 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg))
6258 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
6259
6260 if ( fFlushShadows
6261 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg))
6262 {
6263 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxGstSimdReg];
6264
6265 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
6266 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg)));
6267 }
6268 }
6269#else
6270 RT_NOREF(pReNative, fGstShwExcept, fFlushShadows);
6271#endif
6272
6273 return off;
6274}
6275
6276
6277#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6278/**
6279 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
6280 */
6281DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcWritebackSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6282{
6283 Assert(pReNative->Core.offPc);
6284# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6285 iemNativeDbgInfoAddNativeOffset(pReNative, off);
6286 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, pReNative->Core.cInstrPcUpdateSkipped);
6287# endif
6288
6289# ifndef IEMNATIVE_REG_FIXED_PC_DBG
6290 /* Allocate a temporary PC register. */
6291 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6292
6293 /* Perform the addition and store the result. */
6294 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
6295 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6296
6297 /* Free but don't flush the PC register. */
6298 iemNativeRegFreeTmp(pReNative, idxPcReg);
6299# else
6300 /* Compare the shadow with the context value, they should match. */
6301 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
6302 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
6303# endif
6304
6305 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, pReNative->Core.cInstrPcUpdateSkipped);
6306 pReNative->Core.offPc = 0;
6307 pReNative->Core.cInstrPcUpdateSkipped = 0;
6308
6309 return off;
6310}
6311#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
6312
6313
6314/*********************************************************************************************************************************
6315* Code Emitters (larger snippets) *
6316*********************************************************************************************************************************/
6317
6318/**
6319 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
6320 * extending to 64-bit width.
6321 *
6322 * @returns New code buffer offset on success, UINT32_MAX on failure.
6323 * @param pReNative .
6324 * @param off The current code buffer position.
6325 * @param idxHstReg The host register to load the guest register value into.
6326 * @param enmGstReg The guest register to load.
6327 *
6328 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6329 * that is something the caller needs to do if applicable.
6330 */
6331DECL_HIDDEN_THROW(uint32_t)
6332iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
6333{
6334 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
6335 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
6336
6337 switch (g_aGstShadowInfo[enmGstReg].cb)
6338 {
6339 case sizeof(uint64_t):
6340 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6341 case sizeof(uint32_t):
6342 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6343 case sizeof(uint16_t):
6344 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6345#if 0 /* not present in the table. */
6346 case sizeof(uint8_t):
6347 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6348#endif
6349 default:
6350 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6351 }
6352}
6353
6354
6355#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6356/**
6357 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
6358 *
6359 * @returns New code buffer offset on success, UINT32_MAX on failure.
6360 * @param pReNative The recompiler state.
6361 * @param off The current code buffer position.
6362 * @param idxHstSimdReg The host register to load the guest register value into.
6363 * @param enmGstSimdReg The guest register to load.
6364 * @param enmLoadSz The load size of the register.
6365 *
6366 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6367 * that is something the caller needs to do if applicable.
6368 */
6369DECL_HIDDEN_THROW(uint32_t)
6370iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
6371 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6372{
6373 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
6374
6375 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz);
6376 switch (enmLoadSz)
6377 {
6378 case kIemNativeGstSimdRegLdStSz_256:
6379 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6380 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6381 case kIemNativeGstSimdRegLdStSz_Low128:
6382 return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6383 case kIemNativeGstSimdRegLdStSz_High128:
6384 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6385 default:
6386 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6387 }
6388}
6389#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6390
6391#ifdef VBOX_STRICT
6392
6393/**
6394 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
6395 *
6396 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6397 * Trashes EFLAGS on AMD64.
6398 */
6399DECL_HIDDEN_THROW(uint32_t)
6400iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
6401{
6402# ifdef RT_ARCH_AMD64
6403 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6404
6405 /* rol reg64, 32 */
6406 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6407 pbCodeBuf[off++] = 0xc1;
6408 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6409 pbCodeBuf[off++] = 32;
6410
6411 /* test reg32, ffffffffh */
6412 if (idxReg >= 8)
6413 pbCodeBuf[off++] = X86_OP_REX_B;
6414 pbCodeBuf[off++] = 0xf7;
6415 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6416 pbCodeBuf[off++] = 0xff;
6417 pbCodeBuf[off++] = 0xff;
6418 pbCodeBuf[off++] = 0xff;
6419 pbCodeBuf[off++] = 0xff;
6420
6421 /* je/jz +1 */
6422 pbCodeBuf[off++] = 0x74;
6423 pbCodeBuf[off++] = 0x01;
6424
6425 /* int3 */
6426 pbCodeBuf[off++] = 0xcc;
6427
6428 /* rol reg64, 32 */
6429 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6430 pbCodeBuf[off++] = 0xc1;
6431 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6432 pbCodeBuf[off++] = 32;
6433
6434# elif defined(RT_ARCH_ARM64)
6435 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6436 /* lsr tmp0, reg64, #32 */
6437 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
6438 /* cbz tmp0, +1 */
6439 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6440 /* brk #0x1100 */
6441 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
6442
6443# else
6444# error "Port me!"
6445# endif
6446 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6447 return off;
6448}
6449
6450
6451/**
6452 * Emitting code that checks that the content of register @a idxReg is the same
6453 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
6454 * instruction if that's not the case.
6455 *
6456 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6457 * Trashes EFLAGS on AMD64.
6458 */
6459DECL_HIDDEN_THROW(uint32_t)
6460iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
6461{
6462#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6463 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
6464 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg))
6465 return off;
6466#endif
6467
6468# ifdef RT_ARCH_AMD64
6469 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6470
6471 /* cmp reg, [mem] */
6472 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
6473 {
6474 if (idxReg >= 8)
6475 pbCodeBuf[off++] = X86_OP_REX_R;
6476 pbCodeBuf[off++] = 0x38;
6477 }
6478 else
6479 {
6480 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
6481 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
6482 else
6483 {
6484 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
6485 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6486 else
6487 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
6488 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
6489 if (idxReg >= 8)
6490 pbCodeBuf[off++] = X86_OP_REX_R;
6491 }
6492 pbCodeBuf[off++] = 0x39;
6493 }
6494 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
6495
6496 /* je/jz +1 */
6497 pbCodeBuf[off++] = 0x74;
6498 pbCodeBuf[off++] = 0x01;
6499
6500 /* int3 */
6501 pbCodeBuf[off++] = 0xcc;
6502
6503 /* For values smaller than the register size, we must check that the rest
6504 of the register is all zeros. */
6505 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
6506 {
6507 /* test reg64, imm32 */
6508 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6509 pbCodeBuf[off++] = 0xf7;
6510 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6511 pbCodeBuf[off++] = 0;
6512 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
6513 pbCodeBuf[off++] = 0xff;
6514 pbCodeBuf[off++] = 0xff;
6515
6516 /* je/jz +1 */
6517 pbCodeBuf[off++] = 0x74;
6518 pbCodeBuf[off++] = 0x01;
6519
6520 /* int3 */
6521 pbCodeBuf[off++] = 0xcc;
6522 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6523 }
6524 else
6525 {
6526 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6527 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
6528 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
6529 }
6530
6531# elif defined(RT_ARCH_ARM64)
6532 /* mov TMP0, [gstreg] */
6533 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
6534
6535 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6536 /* sub tmp0, tmp0, idxReg */
6537 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
6538 /* cbz tmp0, +1 */
6539 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6540 /* brk #0x1000+enmGstReg */
6541 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
6542 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6543
6544# else
6545# error "Port me!"
6546# endif
6547 return off;
6548}
6549
6550
6551# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6552# ifdef RT_ARCH_AMD64
6553/**
6554 * Helper for AMD64 to emit code which checks the low 128-bits of the given SIMD register against the given vCPU offset.
6555 */
6556DECL_FORCE_INLINE_THROW(uint32_t) iemNativeEmitGuestSimdRegValueCheckVCpuU128(uint8_t * const pbCodeBuf, uint32_t off, uint8_t idxSimdReg, uint32_t offVCpu)
6557{
6558 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
6559 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6560 if (idxSimdReg >= 8)
6561 pbCodeBuf[off++] = X86_OP_REX_R;
6562 pbCodeBuf[off++] = 0x0f;
6563 pbCodeBuf[off++] = 0x38;
6564 pbCodeBuf[off++] = 0x29;
6565 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxSimdReg, offVCpu);
6566
6567 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
6568 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6569 pbCodeBuf[off++] = X86_OP_REX_W
6570 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
6571 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6572 pbCodeBuf[off++] = 0x0f;
6573 pbCodeBuf[off++] = 0x3a;
6574 pbCodeBuf[off++] = 0x16;
6575 pbCodeBuf[off++] = 0xeb;
6576 pbCodeBuf[off++] = 0x00;
6577
6578 /* cmp tmp0, 0xffffffffffffffff. */
6579 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6580 pbCodeBuf[off++] = 0x83;
6581 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6582 pbCodeBuf[off++] = 0xff;
6583
6584 /* je/jz +1 */
6585 pbCodeBuf[off++] = 0x74;
6586 pbCodeBuf[off++] = 0x01;
6587
6588 /* int3 */
6589 pbCodeBuf[off++] = 0xcc;
6590
6591 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
6592 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6593 pbCodeBuf[off++] = X86_OP_REX_W
6594 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
6595 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6596 pbCodeBuf[off++] = 0x0f;
6597 pbCodeBuf[off++] = 0x3a;
6598 pbCodeBuf[off++] = 0x16;
6599 pbCodeBuf[off++] = 0xeb;
6600 pbCodeBuf[off++] = 0x01;
6601
6602 /* cmp tmp0, 0xffffffffffffffff. */
6603 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6604 pbCodeBuf[off++] = 0x83;
6605 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6606 pbCodeBuf[off++] = 0xff;
6607
6608 /* je/jz +1 */
6609 pbCodeBuf[off++] = 0x74;
6610 pbCodeBuf[off++] = 0x01;
6611
6612 /* int3 */
6613 pbCodeBuf[off++] = 0xcc;
6614
6615 return off;
6616}
6617# endif
6618
6619
6620/**
6621 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
6622 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
6623 * instruction if that's not the case.
6624 *
6625 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
6626 * Trashes EFLAGS on AMD64.
6627 */
6628DECL_HIDDEN_THROW(uint32_t)
6629iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
6630 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6631{
6632 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
6633 if ( ( enmLoadSz == kIemNativeGstSimdRegLdStSz_256
6634 && ( IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)
6635 || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6636 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128
6637 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
6638 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_High128
6639 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6640 return off;
6641
6642# ifdef RT_ARCH_AMD64
6643 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6644 {
6645 /* movdqa vectmp0, idxSimdReg */
6646 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6647
6648 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
6649
6650 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6651 g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6652 }
6653
6654 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6655 {
6656 /* Due to the fact that CPUMCTX stores the high 128-bit separately we need to do this all over again for the high part. */
6657 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 50);
6658
6659 /* vextracti128 vectmp0, idxSimdReg, 1 */
6660 pbCodeBuf[off++] = X86_OP_VEX3;
6661 pbCodeBuf[off++] = (idxSimdReg < 8 ? X86_OP_VEX3_BYTE1_R : 0)
6662 | X86_OP_VEX3_BYTE1_X
6663 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? X86_OP_VEX3_BYTE1_B : 0)
6664 | 0x03; /* Opcode map */
6665 pbCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX3_BYTE2_P_066H);
6666 pbCodeBuf[off++] = 0x39;
6667 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxSimdReg & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
6668 pbCodeBuf[off++] = 0x01;
6669
6670 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6671 g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6672 }
6673# elif defined(RT_ARCH_ARM64)
6674 /* mov vectmp0, [gstreg] */
6675 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
6676
6677 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6678 {
6679 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
6680 /* eor vectmp0, vectmp0, idxSimdReg */
6681 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6682 /* uaddlv vectmp0, vectmp0.16B */
6683 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, kArmv8InstrUAddLVSz_16B);
6684 /* umov tmp0, vectmp0.H[0] */
6685 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6686 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
6687 /* cbz tmp0, +1 */
6688 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6689 /* brk #0x1000+enmGstReg */
6690 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6691 }
6692
6693 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6694 {
6695 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
6696 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
6697 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg + 1);
6698 /* uaddlv vectmp0 + 1, (vectmp0 + 1).16B */
6699 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, kArmv8InstrUAddLVSz_16B);
6700 /* umov tmp0, (vectmp0 + 1).H[0] */
6701 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
6702 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
6703 /* cbz tmp0, +1 */
6704 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6705 /* brk #0x1000+enmGstReg */
6706 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6707 }
6708
6709# else
6710# error "Port me!"
6711# endif
6712
6713 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6714 return off;
6715}
6716# endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6717
6718
6719/**
6720 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
6721 * important bits.
6722 *
6723 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6724 * Trashes EFLAGS on AMD64.
6725 */
6726DECL_HIDDEN_THROW(uint32_t)
6727iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
6728{
6729 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6730 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
6731 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
6732 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
6733
6734#ifdef RT_ARCH_AMD64
6735 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6736
6737 /* je/jz +1 */
6738 pbCodeBuf[off++] = 0x74;
6739 pbCodeBuf[off++] = 0x01;
6740
6741 /* int3 */
6742 pbCodeBuf[off++] = 0xcc;
6743
6744# elif defined(RT_ARCH_ARM64)
6745 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6746
6747 /* b.eq +1 */
6748 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
6749 /* brk #0x2000 */
6750 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
6751
6752# else
6753# error "Port me!"
6754# endif
6755 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6756
6757 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6758 return off;
6759}
6760
6761#endif /* VBOX_STRICT */
6762
6763
6764#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6765/**
6766 * Worker for IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK.
6767 */
6768DECL_HIDDEN_THROW(uint32_t)
6769iemNativeEmitEFlagsSkippingCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflNeeded)
6770{
6771 uint32_t const offVCpu = RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags);
6772
6773 fEflNeeded &= X86_EFL_STATUS_BITS;
6774 if (fEflNeeded)
6775 {
6776# ifdef RT_ARCH_AMD64
6777 /* test dword [pVCpu + offVCpu], imm32 */
6778 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6779 if (fEflNeeded <= 0xff)
6780 {
6781 pCodeBuf[off++] = 0xf6;
6782 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6783 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6784 }
6785 else
6786 {
6787 pCodeBuf[off++] = 0xf7;
6788 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6789 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6790 pCodeBuf[off++] = RT_BYTE2(fEflNeeded);
6791 pCodeBuf[off++] = RT_BYTE3(fEflNeeded);
6792 pCodeBuf[off++] = RT_BYTE4(fEflNeeded);
6793 }
6794 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6795
6796# else
6797 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6798 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, offVCpu);
6799 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp, fEflNeeded);
6800# ifdef RT_ARCH_ARM64
6801 off = iemNativeEmitJzToFixed(pReNative, off, off + 2);
6802 off = iemNativeEmitBrk(pReNative, off, 0x7777);
6803# else
6804# error "Port me!"
6805# endif
6806 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6807# endif
6808 }
6809 return off;
6810}
6811#endif /* IEMNATIVE_STRICT_EFLAGS_SKIPPING */
6812
6813
6814/**
6815 * Emits a code for checking the return code of a call and rcPassUp, returning
6816 * from the code if either are non-zero.
6817 */
6818DECL_HIDDEN_THROW(uint32_t)
6819iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6820{
6821#ifdef RT_ARCH_AMD64
6822 /*
6823 * AMD64: eax = call status code.
6824 */
6825
6826 /* edx = rcPassUp */
6827 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6828# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6829 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
6830# endif
6831
6832 /* edx = eax | rcPassUp */
6833 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6834 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
6835 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
6836 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6837
6838 /* Jump to non-zero status return path. */
6839 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
6840
6841 /* done. */
6842
6843#elif RT_ARCH_ARM64
6844 /*
6845 * ARM64: w0 = call status code.
6846 */
6847# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6848 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
6849# endif
6850 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6851
6852 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6853
6854 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
6855
6856 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6857 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6858 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
6859
6860#else
6861# error "port me"
6862#endif
6863 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6864 RT_NOREF_PV(idxInstr);
6865 return off;
6866}
6867
6868
6869/**
6870 * Emits code to check if the content of @a idxAddrReg is a canonical address,
6871 * raising a \#GP(0) if it isn't.
6872 *
6873 * @returns New code buffer offset, UINT32_MAX on failure.
6874 * @param pReNative The native recompile state.
6875 * @param off The code buffer offset.
6876 * @param idxAddrReg The host register with the address to check.
6877 * @param idxInstr The current instruction.
6878 */
6879DECL_HIDDEN_THROW(uint32_t)
6880iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
6881{
6882 /*
6883 * Make sure we don't have any outstanding guest register writes as we may
6884 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6885 */
6886 off = iemNativeRegFlushPendingWrites(pReNative, off);
6887
6888#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6889 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6890#else
6891 RT_NOREF(idxInstr);
6892#endif
6893
6894#ifdef RT_ARCH_AMD64
6895 /*
6896 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
6897 * return raisexcpt();
6898 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
6899 */
6900 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6901
6902 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
6903 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
6904 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
6905 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
6906 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6907
6908 iemNativeRegFreeTmp(pReNative, iTmpReg);
6909
6910#elif defined(RT_ARCH_ARM64)
6911 /*
6912 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
6913 * return raisexcpt();
6914 * ----
6915 * mov x1, 0x800000000000
6916 * add x1, x0, x1
6917 * cmp xzr, x1, lsr 48
6918 * b.ne .Lraisexcpt
6919 */
6920 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6921
6922 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
6923 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
6924 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
6925 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6926
6927 iemNativeRegFreeTmp(pReNative, iTmpReg);
6928
6929#else
6930# error "Port me"
6931#endif
6932 return off;
6933}
6934
6935
6936/**
6937 * Emits code to check if that the content of @a idxAddrReg is within the limit
6938 * of CS, raising a \#GP(0) if it isn't.
6939 *
6940 * @returns New code buffer offset; throws VBox status code on error.
6941 * @param pReNative The native recompile state.
6942 * @param off The code buffer offset.
6943 * @param idxAddrReg The host register (32-bit) with the address to
6944 * check.
6945 * @param idxInstr The current instruction.
6946 */
6947DECL_HIDDEN_THROW(uint32_t)
6948iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6949 uint8_t idxAddrReg, uint8_t idxInstr)
6950{
6951 /*
6952 * Make sure we don't have any outstanding guest register writes as we may
6953 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6954 */
6955 off = iemNativeRegFlushPendingWrites(pReNative, off);
6956
6957#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6958 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6959#else
6960 RT_NOREF(idxInstr);
6961#endif
6962
6963 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
6964 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
6965 kIemNativeGstRegUse_ReadOnly);
6966
6967 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
6968 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6969
6970 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
6971 return off;
6972}
6973
6974
6975/**
6976 * Emits a call to a CImpl function or something similar.
6977 */
6978DECL_HIDDEN_THROW(uint32_t)
6979iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
6980 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
6981{
6982 /* Writeback everything. */
6983 off = iemNativeRegFlushPendingWrites(pReNative, off);
6984
6985 /*
6986 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
6987 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
6988 */
6989 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
6990 fGstShwFlush
6991 | RT_BIT_64(kIemNativeGstReg_Pc)
6992 | RT_BIT_64(kIemNativeGstReg_EFlags));
6993 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6994
6995 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6996
6997 /*
6998 * Load the parameters.
6999 */
7000#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
7001 /* Special code the hidden VBOXSTRICTRC pointer. */
7002 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7003 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
7004 if (cAddParams > 0)
7005 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
7006 if (cAddParams > 1)
7007 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
7008 if (cAddParams > 2)
7009 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
7010 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
7011
7012#else
7013 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
7014 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7015 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
7016 if (cAddParams > 0)
7017 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
7018 if (cAddParams > 1)
7019 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
7020 if (cAddParams > 2)
7021# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
7022 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
7023# else
7024 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
7025# endif
7026#endif
7027
7028 /*
7029 * Make the call.
7030 */
7031 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
7032
7033#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
7034 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
7035#endif
7036
7037 /*
7038 * Check the status code.
7039 */
7040 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
7041}
7042
7043
7044/**
7045 * Emits a call to a threaded worker function.
7046 */
7047DECL_HIDDEN_THROW(uint32_t)
7048iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
7049{
7050 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
7051
7052 /* We don't know what the threaded function is doing so we must flush all pending writes. */
7053 off = iemNativeRegFlushPendingWrites(pReNative, off);
7054
7055 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
7056 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
7057
7058#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7059 /* The threaded function may throw / long jmp, so set current instruction
7060 number if we're counting. */
7061 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7062#endif
7063
7064 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
7065
7066#ifdef RT_ARCH_AMD64
7067 /* Load the parameters and emit the call. */
7068# ifdef RT_OS_WINDOWS
7069# ifndef VBOXSTRICTRC_STRICT_ENABLED
7070 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
7071 if (cParams > 0)
7072 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
7073 if (cParams > 1)
7074 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
7075 if (cParams > 2)
7076 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
7077# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
7078 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
7079 if (cParams > 0)
7080 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
7081 if (cParams > 1)
7082 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
7083 if (cParams > 2)
7084 {
7085 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
7086 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
7087 }
7088 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
7089# endif /* VBOXSTRICTRC_STRICT_ENABLED */
7090# else
7091 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
7092 if (cParams > 0)
7093 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
7094 if (cParams > 1)
7095 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
7096 if (cParams > 2)
7097 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
7098# endif
7099
7100 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
7101
7102# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
7103 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
7104# endif
7105
7106#elif RT_ARCH_ARM64
7107 /*
7108 * ARM64:
7109 */
7110 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7111 if (cParams > 0)
7112 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
7113 if (cParams > 1)
7114 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
7115 if (cParams > 2)
7116 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
7117
7118 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
7119
7120#else
7121# error "port me"
7122#endif
7123
7124 /*
7125 * Check the status code.
7126 */
7127 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
7128
7129 return off;
7130}
7131
7132#ifdef VBOX_WITH_STATISTICS
7133/**
7134 * Emits code to update the thread call statistics.
7135 */
7136DECL_INLINE_THROW(uint32_t)
7137iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
7138{
7139 /*
7140 * Update threaded function stats.
7141 */
7142 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
7143 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
7144# if defined(RT_ARCH_ARM64)
7145 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
7146 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
7147 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
7148 iemNativeRegFreeTmp(pReNative, idxTmp1);
7149 iemNativeRegFreeTmp(pReNative, idxTmp2);
7150# else
7151 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
7152# endif
7153 return off;
7154}
7155#endif /* VBOX_WITH_STATISTICS */
7156
7157
7158/**
7159 * Emits the code at the ReturnWithFlags label (returns
7160 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
7161 */
7162static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7163{
7164 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
7165 if (idxLabel != UINT32_MAX)
7166 {
7167 iemNativeLabelDefine(pReNative, idxLabel, off);
7168
7169 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
7170
7171 /* jump back to the return sequence. */
7172 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7173 }
7174 return off;
7175}
7176
7177
7178/**
7179 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
7180 */
7181static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7182{
7183 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
7184 if (idxLabel != UINT32_MAX)
7185 {
7186 iemNativeLabelDefine(pReNative, idxLabel, off);
7187
7188 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
7189
7190 /* jump back to the return sequence. */
7191 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7192 }
7193 return off;
7194}
7195
7196
7197/**
7198 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
7199 */
7200static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7201{
7202 /*
7203 * Generate the rc + rcPassUp fiddling code if needed.
7204 */
7205 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
7206 if (idxLabel != UINT32_MAX)
7207 {
7208 iemNativeLabelDefine(pReNative, idxLabel, off);
7209
7210 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
7211#ifdef RT_ARCH_AMD64
7212# ifdef RT_OS_WINDOWS
7213# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7214 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
7215# endif
7216 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
7217 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
7218# else
7219 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
7220 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
7221# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7222 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
7223# endif
7224# endif
7225# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7226 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
7227# endif
7228
7229#else
7230 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
7231 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7232 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
7233#endif
7234
7235 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
7236 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7237 }
7238 return off;
7239}
7240
7241
7242/**
7243 * Emits a standard epilog.
7244 */
7245static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
7246{
7247 *pidxReturnLabel = UINT32_MAX;
7248
7249 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
7250 off = iemNativeRegFlushPendingWrites(pReNative, off);
7251
7252 /*
7253 * Successful return, so clear the return register (eax, w0).
7254 */
7255 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
7256
7257 /*
7258 * Define label for common return point.
7259 */
7260 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
7261 *pidxReturnLabel = idxReturn;
7262
7263 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
7264
7265 /*
7266 * Restore registers and return.
7267 */
7268#ifdef RT_ARCH_AMD64
7269 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
7270
7271 /* Reposition esp at the r15 restore point. */
7272 pbCodeBuf[off++] = X86_OP_REX_W;
7273 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
7274 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
7275 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
7276
7277 /* Pop non-volatile registers and return */
7278 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
7279 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
7280 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
7281 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
7282 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
7283 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
7284 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
7285 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
7286# ifdef RT_OS_WINDOWS
7287 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
7288 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
7289# endif
7290 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
7291 pbCodeBuf[off++] = 0xc9; /* leave */
7292 pbCodeBuf[off++] = 0xc3; /* ret */
7293 pbCodeBuf[off++] = 0xcc; /* int3 poison */
7294
7295#elif RT_ARCH_ARM64
7296 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7297
7298 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
7299 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
7300 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
7301 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
7302 IEMNATIVE_FRAME_VAR_SIZE / 8);
7303 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
7304 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7305 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
7306 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7307 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
7308 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7309 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
7310 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7311 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
7312 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7313 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
7314 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
7315
7316 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
7317 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
7318 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
7319 IEMNATIVE_FRAME_SAVE_REG_SIZE);
7320
7321 /* retab / ret */
7322# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
7323 if (1)
7324 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
7325 else
7326# endif
7327 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
7328
7329#else
7330# error "port me"
7331#endif
7332 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7333
7334 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
7335}
7336
7337
7338/**
7339 * Emits a standard prolog.
7340 */
7341static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7342{
7343#ifdef RT_ARCH_AMD64
7344 /*
7345 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
7346 * reserving 64 bytes for stack variables plus 4 non-register argument
7347 * slots. Fixed register assignment: xBX = pReNative;
7348 *
7349 * Since we always do the same register spilling, we can use the same
7350 * unwind description for all the code.
7351 */
7352 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7353 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
7354 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
7355 pbCodeBuf[off++] = 0x8b;
7356 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
7357 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
7358 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
7359# ifdef RT_OS_WINDOWS
7360 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
7361 pbCodeBuf[off++] = 0x8b;
7362 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
7363 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
7364 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
7365# else
7366 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
7367 pbCodeBuf[off++] = 0x8b;
7368 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
7369# endif
7370 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
7371 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
7372 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
7373 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
7374 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
7375 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
7376 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
7377 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
7378
7379# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
7380 /* Save the frame pointer. */
7381 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
7382# endif
7383
7384 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
7385 X86_GREG_xSP,
7386 IEMNATIVE_FRAME_ALIGN_SIZE
7387 + IEMNATIVE_FRAME_VAR_SIZE
7388 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
7389 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
7390 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
7391 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
7392 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
7393
7394#elif RT_ARCH_ARM64
7395 /*
7396 * We set up a stack frame exactly like on x86, only we have to push the
7397 * return address our selves here. We save all non-volatile registers.
7398 */
7399 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
7400
7401# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
7402 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
7403 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
7404 * in any way conditional, so just emitting this instructions now and hoping for the best... */
7405 /* pacibsp */
7406 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
7407# endif
7408
7409 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
7410 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
7411 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
7412 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
7413 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
7414 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
7415 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7416 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
7417 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7418 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
7419 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7420 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
7421 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7422 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
7423 /* Save the BP and LR (ret address) registers at the top of the frame. */
7424 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7425 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
7426 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
7427 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
7428 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
7429 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
7430
7431 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
7432 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
7433
7434 /* mov r28, r0 */
7435 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
7436 /* mov r27, r1 */
7437 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
7438
7439# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
7440 /* Save the frame pointer. */
7441 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
7442 ARMV8_A64_REG_X2);
7443# endif
7444
7445#else
7446# error "port me"
7447#endif
7448 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7449 return off;
7450}
7451
7452
7453/*********************************************************************************************************************************
7454* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
7455*********************************************************************************************************************************/
7456
7457/**
7458 * Internal work that allocates a variable with kind set to
7459 * kIemNativeVarKind_Invalid and no current stack allocation.
7460 *
7461 * The kind will either be set by the caller or later when the variable is first
7462 * assigned a value.
7463 *
7464 * @returns Unpacked index.
7465 * @internal
7466 */
7467static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7468{
7469 Assert(cbType > 0 && cbType <= 64);
7470 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
7471 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
7472 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
7473 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7474 pReNative->Core.aVars[idxVar].cbVar = cbType;
7475 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7476 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7477 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
7478 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
7479 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
7480 pReNative->Core.aVars[idxVar].fRegAcquired = false;
7481 pReNative->Core.aVars[idxVar].u.uValue = 0;
7482#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7483 pReNative->Core.aVars[idxVar].fSimdReg = false;
7484#endif
7485 return idxVar;
7486}
7487
7488
7489/**
7490 * Internal work that allocates an argument variable w/o setting enmKind.
7491 *
7492 * @returns Unpacked index.
7493 * @internal
7494 */
7495static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7496{
7497 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
7498 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7499 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
7500
7501 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7502 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
7503 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
7504 return idxVar;
7505}
7506
7507
7508/**
7509 * Gets the stack slot for a stack variable, allocating one if necessary.
7510 *
7511 * Calling this function implies that the stack slot will contain a valid
7512 * variable value. The caller deals with any register currently assigned to the
7513 * variable, typically by spilling it into the stack slot.
7514 *
7515 * @returns The stack slot number.
7516 * @param pReNative The recompiler state.
7517 * @param idxVar The variable.
7518 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
7519 */
7520DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7521{
7522 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7523 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7524 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
7525
7526 /* Already got a slot? */
7527 uint8_t const idxStackSlot = pVar->idxStackSlot;
7528 if (idxStackSlot != UINT8_MAX)
7529 {
7530 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
7531 return idxStackSlot;
7532 }
7533
7534 /*
7535 * A single slot is easy to allocate.
7536 * Allocate them from the top end, closest to BP, to reduce the displacement.
7537 */
7538 if (pVar->cbVar <= sizeof(uint64_t))
7539 {
7540 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7541 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7542 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
7543 pVar->idxStackSlot = (uint8_t)iSlot;
7544 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
7545 return (uint8_t)iSlot;
7546 }
7547
7548 /*
7549 * We need more than one stack slot.
7550 *
7551 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
7552 */
7553 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
7554 Assert(pVar->cbVar <= 64);
7555 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
7556 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
7557 uint32_t bmStack = pReNative->Core.bmStack;
7558 while (bmStack != UINT32_MAX)
7559 {
7560 unsigned iSlot = ASMBitLastSetU32(~bmStack);
7561 AssertStmt(iSlot, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7562 iSlot = (iSlot - 1) & ~fBitAlignMask;
7563 if ((bmStack & ~(fBitAllocMask << iSlot)) == bmStack)
7564 {
7565 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
7566 pVar->idxStackSlot = (uint8_t)iSlot;
7567 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7568 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
7569 return (uint8_t)iSlot;
7570 }
7571
7572 bmStack |= (fBitAllocMask << iSlot);
7573 }
7574 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7575}
7576
7577
7578/**
7579 * Changes the variable to a stack variable.
7580 *
7581 * Currently this is s only possible to do the first time the variable is used,
7582 * switching later is can be implemented but not done.
7583 *
7584 * @param pReNative The recompiler state.
7585 * @param idxVar The variable.
7586 * @throws VERR_IEM_VAR_IPE_2
7587 */
7588DECL_HIDDEN_THROW(void) iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7589{
7590 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7591 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7592 if (pVar->enmKind != kIemNativeVarKind_Stack)
7593 {
7594 /* We could in theory transition from immediate to stack as well, but it
7595 would involve the caller doing work storing the value on the stack. So,
7596 till that's required we only allow transition from invalid. */
7597 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7598 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7599 pVar->enmKind = kIemNativeVarKind_Stack;
7600
7601 /* Note! We don't allocate a stack slot here, that's only done when a
7602 slot is actually needed to hold a variable value. */
7603 }
7604}
7605
7606
7607/**
7608 * Sets it to a variable with a constant value.
7609 *
7610 * This does not require stack storage as we know the value and can always
7611 * reload it, unless of course it's referenced.
7612 *
7613 * @param pReNative The recompiler state.
7614 * @param idxVar The variable.
7615 * @param uValue The immediate value.
7616 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7617 */
7618DECL_HIDDEN_THROW(void) iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
7619{
7620 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7621 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7622 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7623 {
7624 /* Only simple transitions for now. */
7625 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7626 pVar->enmKind = kIemNativeVarKind_Immediate;
7627 }
7628 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7629
7630 pVar->u.uValue = uValue;
7631 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
7632 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
7633 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
7634}
7635
7636
7637/**
7638 * Sets the variable to a reference (pointer) to @a idxOtherVar.
7639 *
7640 * This does not require stack storage as we know the value and can always
7641 * reload it. Loading is postponed till needed.
7642 *
7643 * @param pReNative The recompiler state.
7644 * @param idxVar The variable. Unpacked.
7645 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
7646 *
7647 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7648 * @internal
7649 */
7650static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
7651{
7652 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
7653 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
7654
7655 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
7656 {
7657 /* Only simple transitions for now. */
7658 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7659 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7660 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
7661 }
7662 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7663
7664 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
7665
7666 /* Update the other variable, ensure it's a stack variable. */
7667 /** @todo handle variables with const values... that'll go boom now. */
7668 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
7669 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
7670}
7671
7672
7673/**
7674 * Sets the variable to a reference (pointer) to a guest register reference.
7675 *
7676 * This does not require stack storage as we know the value and can always
7677 * reload it. Loading is postponed till needed.
7678 *
7679 * @param pReNative The recompiler state.
7680 * @param idxVar The variable.
7681 * @param enmRegClass The class guest registers to reference.
7682 * @param idxReg The register within @a enmRegClass to reference.
7683 *
7684 * @throws VERR_IEM_VAR_IPE_2
7685 */
7686DECL_HIDDEN_THROW(void) iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
7687 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
7688{
7689 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7690 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7691
7692 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
7693 {
7694 /* Only simple transitions for now. */
7695 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7696 pVar->enmKind = kIemNativeVarKind_GstRegRef;
7697 }
7698 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7699
7700 pVar->u.GstRegRef.enmClass = enmRegClass;
7701 pVar->u.GstRegRef.idx = idxReg;
7702}
7703
7704
7705DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7706{
7707 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7708}
7709
7710
7711DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
7712{
7713 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7714
7715 /* Since we're using a generic uint64_t value type, we must truncate it if
7716 the variable is smaller otherwise we may end up with too large value when
7717 scaling up a imm8 w/ sign-extension.
7718
7719 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
7720 in the bios, bx=1) when running on arm, because clang expect 16-bit
7721 register parameters to have bits 16 and up set to zero. Instead of
7722 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
7723 CF value in the result. */
7724 switch (cbType)
7725 {
7726 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7727 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7728 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7729 }
7730 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7731 return idxVar;
7732}
7733
7734
7735DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
7736{
7737 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
7738 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
7739 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
7740 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
7741 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
7742 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7743
7744 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
7745 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
7746 return idxArgVar;
7747}
7748
7749
7750DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7751{
7752 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7753 /* Don't set to stack now, leave that to the first use as for instance
7754 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
7755 return idxVar;
7756}
7757
7758
7759DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
7760{
7761 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7762
7763 /* Since we're using a generic uint64_t value type, we must truncate it if
7764 the variable is smaller otherwise we may end up with too large value when
7765 scaling up a imm8 w/ sign-extension. */
7766 switch (cbType)
7767 {
7768 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7769 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7770 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7771 }
7772 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7773 return idxVar;
7774}
7775
7776
7777/**
7778 * Makes sure variable @a idxVar has a register assigned to it and that it stays
7779 * fixed till we call iemNativeVarRegisterRelease.
7780 *
7781 * @returns The host register number.
7782 * @param pReNative The recompiler state.
7783 * @param idxVar The variable.
7784 * @param poff Pointer to the instruction buffer offset.
7785 * In case a register needs to be freed up or the value
7786 * loaded off the stack.
7787 * @param fInitialized Set if the variable must already have been initialized.
7788 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7789 * the case.
7790 * @param idxRegPref Preferred register number or UINT8_MAX.
7791 */
7792DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7793 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7794{
7795 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7796 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7797 Assert(pVar->cbVar <= 8);
7798 Assert(!pVar->fRegAcquired);
7799
7800 uint8_t idxReg = pVar->idxReg;
7801 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7802 {
7803 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7804 && pVar->enmKind < kIemNativeVarKind_End);
7805 pVar->fRegAcquired = true;
7806 return idxReg;
7807 }
7808
7809 /*
7810 * If the kind of variable has not yet been set, default to 'stack'.
7811 */
7812 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7813 && pVar->enmKind < kIemNativeVarKind_End);
7814 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7815 iemNativeVarSetKindToStack(pReNative, idxVar);
7816
7817 /*
7818 * We have to allocate a register for the variable, even if its a stack one
7819 * as we don't know if there are modification being made to it before its
7820 * finalized (todo: analyze and insert hints about that?).
7821 *
7822 * If we can, we try get the correct register for argument variables. This
7823 * is assuming that most argument variables are fetched as close as possible
7824 * to the actual call, so that there aren't any interfering hidden calls
7825 * (memory accesses, etc) inbetween.
7826 *
7827 * If we cannot or it's a variable, we make sure no argument registers
7828 * that will be used by this MC block will be allocated here, and we always
7829 * prefer non-volatile registers to avoid needing to spill stuff for internal
7830 * call.
7831 */
7832 /** @todo Detect too early argument value fetches and warn about hidden
7833 * calls causing less optimal code to be generated in the python script. */
7834
7835 uint8_t const uArgNo = pVar->uArgNo;
7836 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
7837 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
7838 {
7839 idxReg = g_aidxIemNativeCallRegs[uArgNo];
7840
7841#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7842 /* Writeback any dirty shadow registers we are about to unshadow. */
7843 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
7844#endif
7845
7846 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7847 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
7848 }
7849 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
7850 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
7851 {
7852 /** @todo there must be a better way for this and boot cArgsX? */
7853 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgsX, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7854 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
7855 & ~pReNative->Core.bmHstRegsWithGstShadow
7856 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
7857 & fNotArgsMask;
7858 if (fRegs)
7859 {
7860 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
7861 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
7862 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
7863 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
7864 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
7865 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7866 }
7867 else
7868 {
7869 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7870 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
7871 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7872 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7873 }
7874 }
7875 else
7876 {
7877 idxReg = idxRegPref;
7878 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7879 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7880 }
7881 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7882 pVar->idxReg = idxReg;
7883
7884#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7885 pVar->fSimdReg = false;
7886#endif
7887
7888 /*
7889 * Load it off the stack if we've got a stack slot.
7890 */
7891 uint8_t const idxStackSlot = pVar->idxStackSlot;
7892 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7893 {
7894 Assert(fInitialized);
7895 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7896 switch (pVar->cbVar)
7897 {
7898 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
7899 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
7900 case 3: AssertFailed(); RT_FALL_THRU();
7901 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
7902 default: AssertFailed(); RT_FALL_THRU();
7903 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
7904 }
7905 }
7906 else
7907 {
7908 Assert(idxStackSlot == UINT8_MAX);
7909 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7910 }
7911 pVar->fRegAcquired = true;
7912 return idxReg;
7913}
7914
7915
7916#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7917/**
7918 * Makes sure variable @a idxVar has a SIMD register assigned to it and that it stays
7919 * fixed till we call iemNativeVarRegisterRelease.
7920 *
7921 * @returns The host register number.
7922 * @param pReNative The recompiler state.
7923 * @param idxVar The variable.
7924 * @param poff Pointer to the instruction buffer offset.
7925 * In case a register needs to be freed up or the value
7926 * loaded off the stack.
7927 * @param fInitialized Set if the variable must already have been initialized.
7928 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7929 * the case.
7930 * @param idxRegPref Preferred SIMD register number or UINT8_MAX.
7931 */
7932DECL_HIDDEN_THROW(uint8_t) iemNativeVarSimdRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7933 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7934{
7935 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7936 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7937 Assert( pVar->cbVar == sizeof(RTUINT128U)
7938 || pVar->cbVar == sizeof(RTUINT256U));
7939 Assert(!pVar->fRegAcquired);
7940
7941 uint8_t idxReg = pVar->idxReg;
7942 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs))
7943 {
7944 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7945 && pVar->enmKind < kIemNativeVarKind_End);
7946 pVar->fRegAcquired = true;
7947 return idxReg;
7948 }
7949
7950 /*
7951 * If the kind of variable has not yet been set, default to 'stack'.
7952 */
7953 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7954 && pVar->enmKind < kIemNativeVarKind_End);
7955 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7956 iemNativeVarSetKindToStack(pReNative, idxVar);
7957
7958 /*
7959 * We have to allocate a register for the variable, even if its a stack one
7960 * as we don't know if there are modification being made to it before its
7961 * finalized (todo: analyze and insert hints about that?).
7962 *
7963 * If we can, we try get the correct register for argument variables. This
7964 * is assuming that most argument variables are fetched as close as possible
7965 * to the actual call, so that there aren't any interfering hidden calls
7966 * (memory accesses, etc) inbetween.
7967 *
7968 * If we cannot or it's a variable, we make sure no argument registers
7969 * that will be used by this MC block will be allocated here, and we always
7970 * prefer non-volatile registers to avoid needing to spill stuff for internal
7971 * call.
7972 */
7973 /** @todo Detect too early argument value fetches and warn about hidden
7974 * calls causing less optimal code to be generated in the python script. */
7975
7976 uint8_t const uArgNo = pVar->uArgNo;
7977 Assert(uArgNo == UINT8_MAX); RT_NOREF(uArgNo); /* No SIMD registers as arguments for now. */
7978
7979 /* SIMD is bit simpler for now because there is no support for arguments. */
7980 if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
7981 || (pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegPref)))
7982 {
7983 uint32_t const fNotArgsMask = UINT32_MAX; //~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7984 uint32_t const fRegs = ~pReNative->Core.bmHstSimdRegs
7985 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
7986 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
7987 & fNotArgsMask;
7988 if (fRegs)
7989 {
7990 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
7991 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
7992 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows == 0);
7993 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg)));
7994 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7995 }
7996 else
7997 {
7998 idxReg = iemNativeSimdRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7999 IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & fNotArgsMask);
8000 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
8001 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
8002 }
8003 }
8004 else
8005 {
8006 idxReg = idxRegPref;
8007 AssertReleaseFailed(); //iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
8008 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
8009 }
8010 iemNativeSimdRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
8011
8012 pVar->fSimdReg = true;
8013 pVar->idxReg = idxReg;
8014
8015 /*
8016 * Load it off the stack if we've got a stack slot.
8017 */
8018 uint8_t const idxStackSlot = pVar->idxStackSlot;
8019 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
8020 {
8021 Assert(fInitialized);
8022 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
8023 switch (pVar->cbVar)
8024 {
8025 case sizeof(RTUINT128U): *poff = iemNativeEmitLoadVecRegByBpU128(pReNative, *poff, idxReg, offDispBp); break;
8026 default: AssertFailed(); RT_FALL_THRU();
8027 case sizeof(RTUINT256U): *poff = iemNativeEmitLoadVecRegByBpU256(pReNative, *poff, idxReg, offDispBp); break;
8028 }
8029 }
8030 else
8031 {
8032 Assert(idxStackSlot == UINT8_MAX);
8033 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8034 }
8035 pVar->fRegAcquired = true;
8036 return idxReg;
8037}
8038#endif
8039
8040
8041/**
8042 * The value of variable @a idxVar will be written in full to the @a enmGstReg
8043 * guest register.
8044 *
8045 * This function makes sure there is a register for it and sets it to be the
8046 * current shadow copy of @a enmGstReg.
8047 *
8048 * @returns The host register number.
8049 * @param pReNative The recompiler state.
8050 * @param idxVar The variable.
8051 * @param enmGstReg The guest register this variable will be written to
8052 * after this call.
8053 * @param poff Pointer to the instruction buffer offset.
8054 * In case a register needs to be freed up or if the
8055 * variable content needs to be loaded off the stack.
8056 *
8057 * @note We DO NOT expect @a idxVar to be an argument variable,
8058 * because we can only in the commit stage of an instruction when this
8059 * function is used.
8060 */
8061DECL_HIDDEN_THROW(uint8_t)
8062iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
8063{
8064 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8065 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8066 Assert(!pVar->fRegAcquired);
8067 AssertMsgStmt( pVar->cbVar <= 8
8068 && ( pVar->enmKind == kIemNativeVarKind_Immediate
8069 || pVar->enmKind == kIemNativeVarKind_Stack),
8070 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
8071 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
8072 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
8073
8074 /*
8075 * This shouldn't ever be used for arguments, unless it's in a weird else
8076 * branch that doesn't do any calling and even then it's questionable.
8077 *
8078 * However, in case someone writes crazy wrong MC code and does register
8079 * updates before making calls, just use the regular register allocator to
8080 * ensure we get a register suitable for the intended argument number.
8081 */
8082 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
8083
8084 /*
8085 * If there is already a register for the variable, we transfer/set the
8086 * guest shadow copy assignment to it.
8087 */
8088 uint8_t idxReg = pVar->idxReg;
8089 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8090 {
8091#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8092 if (enmGstReg >= kIemNativeGstReg_GprFirst && enmGstReg <= kIemNativeGstReg_GprLast)
8093 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
8094#endif
8095
8096 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
8097 {
8098 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
8099 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
8100 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
8101 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
8102 }
8103 else
8104 {
8105 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
8106 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
8107 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
8108 }
8109 /** @todo figure this one out. We need some way of making sure the register isn't
8110 * modified after this point, just in case we start writing crappy MC code. */
8111 pVar->enmGstReg = enmGstReg;
8112 pVar->fRegAcquired = true;
8113 return idxReg;
8114 }
8115 Assert(pVar->uArgNo == UINT8_MAX);
8116
8117 /*
8118 * Because this is supposed to be the commit stage, we're just tag along with the
8119 * temporary register allocator and upgrade it to a variable register.
8120 */
8121 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
8122 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
8123 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
8124 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
8125 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
8126 pVar->idxReg = idxReg;
8127
8128 /*
8129 * Now we need to load the register value.
8130 */
8131 if (pVar->enmKind == kIemNativeVarKind_Immediate)
8132 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
8133 else
8134 {
8135 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8136 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
8137 switch (pVar->cbVar)
8138 {
8139 case sizeof(uint64_t):
8140 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
8141 break;
8142 case sizeof(uint32_t):
8143 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
8144 break;
8145 case sizeof(uint16_t):
8146 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
8147 break;
8148 case sizeof(uint8_t):
8149 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
8150 break;
8151 default:
8152 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
8153 }
8154 }
8155
8156 pVar->fRegAcquired = true;
8157 return idxReg;
8158}
8159
8160
8161/**
8162 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
8163 *
8164 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
8165 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
8166 * requirement of flushing anything in volatile host registers when making a
8167 * call.
8168 *
8169 * @returns New @a off value.
8170 * @param pReNative The recompiler state.
8171 * @param off The code buffer position.
8172 * @param fHstRegsNotToSave Set of registers not to save & restore.
8173 */
8174DECL_HIDDEN_THROW(uint32_t)
8175iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
8176{
8177 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
8178 if (fHstRegs)
8179 {
8180 do
8181 {
8182 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8183 fHstRegs &= ~RT_BIT_32(idxHstReg);
8184
8185 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
8186 {
8187 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
8188 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8189 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8190 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8191 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
8192 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8193 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8194 {
8195 case kIemNativeVarKind_Stack:
8196 {
8197 /* Temporarily spill the variable register. */
8198 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8199 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
8200 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8201 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8202 continue;
8203 }
8204
8205 case kIemNativeVarKind_Immediate:
8206 case kIemNativeVarKind_VarRef:
8207 case kIemNativeVarKind_GstRegRef:
8208 /* It is weird to have any of these loaded at this point. */
8209 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8210 continue;
8211
8212 case kIemNativeVarKind_End:
8213 case kIemNativeVarKind_Invalid:
8214 break;
8215 }
8216 AssertFailed();
8217 }
8218 else
8219 {
8220 /*
8221 * Allocate a temporary stack slot and spill the register to it.
8222 */
8223 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
8224 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
8225 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
8226 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
8227 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
8228 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
8229 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8230 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8231 }
8232 } while (fHstRegs);
8233 }
8234#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8235 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
8236 if (fHstRegs)
8237 {
8238 do
8239 {
8240 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8241 fHstRegs &= ~RT_BIT_32(idxHstReg);
8242
8243 /*
8244 * Guest registers are flushed to CPUMCTX at the moment and don't need allocating a stack slot
8245 * which would be more difficult due to spanning multiple stack slots and different sizes
8246 * (besides we only have a limited amount of slots at the moment). Fixed temporary registers
8247 * don't need saving.
8248 */
8249 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp
8250 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved)
8251 continue;
8252
8253 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
8254
8255 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
8256 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8257 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8258 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8259 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
8260 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
8261 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
8262 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
8263 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8264 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8265 {
8266 case kIemNativeVarKind_Stack:
8267 {
8268 /* Temporarily spill the variable register. */
8269 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
8270 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8271 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
8272 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8273 if (cbVar == sizeof(RTUINT128U))
8274 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8275 else
8276 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8277 continue;
8278 }
8279
8280 case kIemNativeVarKind_Immediate:
8281 case kIemNativeVarKind_VarRef:
8282 case kIemNativeVarKind_GstRegRef:
8283 /* It is weird to have any of these loaded at this point. */
8284 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8285 continue;
8286
8287 case kIemNativeVarKind_End:
8288 case kIemNativeVarKind_Invalid:
8289 break;
8290 }
8291 AssertFailed();
8292 } while (fHstRegs);
8293 }
8294#endif
8295 return off;
8296}
8297
8298
8299/**
8300 * Emit code to restore volatile registers after to a call to a helper.
8301 *
8302 * @returns New @a off value.
8303 * @param pReNative The recompiler state.
8304 * @param off The code buffer position.
8305 * @param fHstRegsNotToSave Set of registers not to save & restore.
8306 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
8307 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
8308 */
8309DECL_HIDDEN_THROW(uint32_t)
8310iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
8311{
8312 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
8313 if (fHstRegs)
8314 {
8315 do
8316 {
8317 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8318 fHstRegs &= ~RT_BIT_32(idxHstReg);
8319
8320 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
8321 {
8322 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
8323 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8324 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8325 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8326 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
8327 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8328 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8329 {
8330 case kIemNativeVarKind_Stack:
8331 {
8332 /* Unspill the variable register. */
8333 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8334 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
8335 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8336 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8337 continue;
8338 }
8339
8340 case kIemNativeVarKind_Immediate:
8341 case kIemNativeVarKind_VarRef:
8342 case kIemNativeVarKind_GstRegRef:
8343 /* It is weird to have any of these loaded at this point. */
8344 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8345 continue;
8346
8347 case kIemNativeVarKind_End:
8348 case kIemNativeVarKind_Invalid:
8349 break;
8350 }
8351 AssertFailed();
8352 }
8353 else
8354 {
8355 /*
8356 * Restore from temporary stack slot.
8357 */
8358 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
8359 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
8360 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
8361 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
8362
8363 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8364 }
8365 } while (fHstRegs);
8366 }
8367#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8368 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
8369 if (fHstRegs)
8370 {
8371 do
8372 {
8373 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8374 fHstRegs &= ~RT_BIT_32(idxHstReg);
8375
8376 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp
8377 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved)
8378 continue;
8379 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
8380
8381 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
8382 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8383 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8384 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8385 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
8386 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
8387 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
8388 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
8389 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8390 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8391 {
8392 case kIemNativeVarKind_Stack:
8393 {
8394 /* Unspill the variable register. */
8395 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
8396 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8397 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
8398 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8399
8400 if (cbVar == sizeof(RTUINT128U))
8401 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8402 else
8403 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8404 continue;
8405 }
8406
8407 case kIemNativeVarKind_Immediate:
8408 case kIemNativeVarKind_VarRef:
8409 case kIemNativeVarKind_GstRegRef:
8410 /* It is weird to have any of these loaded at this point. */
8411 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8412 continue;
8413
8414 case kIemNativeVarKind_End:
8415 case kIemNativeVarKind_Invalid:
8416 break;
8417 }
8418 AssertFailed();
8419 } while (fHstRegs);
8420 }
8421#endif
8422 return off;
8423}
8424
8425
8426/**
8427 * Worker that frees the stack slots for variable @a idxVar if any allocated.
8428 *
8429 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
8430 *
8431 * ASSUMES that @a idxVar is valid and unpacked.
8432 */
8433DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8434{
8435 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
8436 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
8437 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
8438 {
8439 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
8440 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
8441 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
8442 Assert(cSlots > 0);
8443 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
8444 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
8445 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
8446 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
8447 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
8448 }
8449 else
8450 Assert(idxStackSlot == UINT8_MAX);
8451}
8452
8453
8454/**
8455 * Worker that frees a single variable.
8456 *
8457 * ASSUMES that @a idxVar is valid and unpacked.
8458 */
8459DECLHIDDEN(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8460{
8461 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
8462 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
8463 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
8464
8465 /* Free the host register first if any assigned. */
8466 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8467#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8468 if ( idxHstReg != UINT8_MAX
8469 && pReNative->Core.aVars[idxVar].fSimdReg)
8470 {
8471 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8472 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8473 pReNative->Core.aHstSimdRegs[idxHstReg].idxVar = UINT8_MAX;
8474 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
8475 }
8476 else
8477#endif
8478 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8479 {
8480 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8481 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8482 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8483 }
8484
8485 /* Free argument mapping. */
8486 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
8487 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
8488 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
8489
8490 /* Free the stack slots. */
8491 iemNativeVarFreeStackSlots(pReNative, idxVar);
8492
8493 /* Free the actual variable. */
8494 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
8495 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8496}
8497
8498
8499/**
8500 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
8501 */
8502DECLHIDDEN(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
8503{
8504 while (bmVars != 0)
8505 {
8506 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8507 bmVars &= ~RT_BIT_32(idxVar);
8508
8509#if 1 /** @todo optimize by simplifying this later... */
8510 iemNativeVarFreeOneWorker(pReNative, idxVar);
8511#else
8512 /* Only need to free the host register, the rest is done as bulk updates below. */
8513 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8514 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8515 {
8516 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8517 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8518 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8519 }
8520#endif
8521 }
8522#if 0 /** @todo optimize by simplifying this later... */
8523 pReNative->Core.bmVars = 0;
8524 pReNative->Core.bmStack = 0;
8525 pReNative->Core.u64ArgVars = UINT64_MAX;
8526#endif
8527}
8528
8529
8530
8531/*********************************************************************************************************************************
8532* Emitters for IEM_MC_CALL_CIMPL_XXX *
8533*********************************************************************************************************************************/
8534
8535/**
8536 * Emits code to load a reference to the given guest register into @a idxGprDst.
8537 */
8538DECL_HIDDEN_THROW(uint32_t)
8539iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
8540 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
8541{
8542#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8543 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
8544#endif
8545
8546 /*
8547 * Get the offset relative to the CPUMCTX structure.
8548 */
8549 uint32_t offCpumCtx;
8550 switch (enmClass)
8551 {
8552 case kIemNativeGstRegRef_Gpr:
8553 Assert(idxRegInClass < 16);
8554 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
8555 break;
8556
8557 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
8558 Assert(idxRegInClass < 4);
8559 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
8560 break;
8561
8562 case kIemNativeGstRegRef_EFlags:
8563 Assert(idxRegInClass == 0);
8564 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
8565 break;
8566
8567 case kIemNativeGstRegRef_MxCsr:
8568 Assert(idxRegInClass == 0);
8569 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
8570 break;
8571
8572 case kIemNativeGstRegRef_FpuReg:
8573 Assert(idxRegInClass < 8);
8574 AssertFailed(); /** @todo what kind of indexing? */
8575 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8576 break;
8577
8578 case kIemNativeGstRegRef_MReg:
8579 Assert(idxRegInClass < 8);
8580 AssertFailed(); /** @todo what kind of indexing? */
8581 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8582 break;
8583
8584 case kIemNativeGstRegRef_XReg:
8585 Assert(idxRegInClass < 16);
8586 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
8587 break;
8588
8589 case kIemNativeGstRegRef_X87: /* Not a register actually but we would just duplicate code otherwise. */
8590 Assert(idxRegInClass == 0);
8591 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87);
8592 break;
8593
8594 case kIemNativeGstRegRef_XState: /* Not a register actually but we would just duplicate code otherwise. */
8595 Assert(idxRegInClass == 0);
8596 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState);
8597 break;
8598
8599 default:
8600 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
8601 }
8602
8603 /*
8604 * Load the value into the destination register.
8605 */
8606#ifdef RT_ARCH_AMD64
8607 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
8608
8609#elif defined(RT_ARCH_ARM64)
8610 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8611 Assert(offCpumCtx < 4096);
8612 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
8613
8614#else
8615# error "Port me!"
8616#endif
8617
8618 return off;
8619}
8620
8621
8622/**
8623 * Common code for CIMPL and AIMPL calls.
8624 *
8625 * These are calls that uses argument variables and such. They should not be
8626 * confused with internal calls required to implement an MC operation,
8627 * like a TLB load and similar.
8628 *
8629 * Upon return all that is left to do is to load any hidden arguments and
8630 * perform the call. All argument variables are freed.
8631 *
8632 * @returns New code buffer offset; throws VBox status code on error.
8633 * @param pReNative The native recompile state.
8634 * @param off The code buffer offset.
8635 * @param cArgs The total nubmer of arguments (includes hidden
8636 * count).
8637 * @param cHiddenArgs The number of hidden arguments. The hidden
8638 * arguments must not have any variable declared for
8639 * them, whereas all the regular arguments must
8640 * (tstIEMCheckMc ensures this).
8641 */
8642DECL_HIDDEN_THROW(uint32_t)
8643iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
8644{
8645#ifdef VBOX_STRICT
8646 /*
8647 * Assert sanity.
8648 */
8649 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
8650 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
8651 for (unsigned i = 0; i < cHiddenArgs; i++)
8652 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
8653 for (unsigned i = cHiddenArgs; i < cArgs; i++)
8654 {
8655 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
8656 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
8657 }
8658 iemNativeRegAssertSanity(pReNative);
8659#endif
8660
8661 /* We don't know what the called function makes use of, so flush any pending register writes. */
8662 off = iemNativeRegFlushPendingWrites(pReNative, off);
8663
8664 /*
8665 * Before we do anything else, go over variables that are referenced and
8666 * make sure they are not in a register.
8667 */
8668 uint32_t bmVars = pReNative->Core.bmVars;
8669 if (bmVars)
8670 {
8671 do
8672 {
8673 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8674 bmVars &= ~RT_BIT_32(idxVar);
8675
8676 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
8677 {
8678 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
8679#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8680 if ( idxRegOld != UINT8_MAX
8681 && pReNative->Core.aVars[idxVar].fSimdReg)
8682 {
8683 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8684 Assert(pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U) || pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT256U));
8685
8686 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8687 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8688 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8689 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8690 if (pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U))
8691 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8692 else
8693 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8694
8695 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
8696 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
8697
8698 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8699 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
8700 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8701 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
8702 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
8703 }
8704 else
8705#endif
8706 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
8707 {
8708 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8709 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8710 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8711 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8712 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8713
8714 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8715 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
8716 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8717 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
8718 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
8719 }
8720 }
8721 } while (bmVars != 0);
8722#if 0 //def VBOX_STRICT
8723 iemNativeRegAssertSanity(pReNative);
8724#endif
8725 }
8726
8727 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
8728
8729 /*
8730 * First, go over the host registers that will be used for arguments and make
8731 * sure they either hold the desired argument or are free.
8732 */
8733 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
8734 {
8735 for (uint32_t i = 0; i < cRegArgs; i++)
8736 {
8737 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8738 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8739 {
8740 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
8741 {
8742 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
8743 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8744 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8745 Assert(pVar->idxReg == idxArgReg);
8746 uint8_t const uArgNo = pVar->uArgNo;
8747 if (uArgNo == i)
8748 { /* prefect */ }
8749 /* The variable allocator logic should make sure this is impossible,
8750 except for when the return register is used as a parameter (ARM,
8751 but not x86). */
8752#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
8753 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
8754 {
8755# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8756# error "Implement this"
8757# endif
8758 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
8759 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
8760 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
8761 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8762 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
8763 }
8764#endif
8765 else
8766 {
8767 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8768
8769 if (pVar->enmKind == kIemNativeVarKind_Stack)
8770 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
8771 else
8772 {
8773 /* just free it, can be reloaded if used again */
8774 pVar->idxReg = UINT8_MAX;
8775 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
8776 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
8777 }
8778 }
8779 }
8780 else
8781 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
8782 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
8783 }
8784 }
8785#if 0 //def VBOX_STRICT
8786 iemNativeRegAssertSanity(pReNative);
8787#endif
8788 }
8789
8790 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
8791
8792#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8793 /*
8794 * If there are any stack arguments, make sure they are in their place as well.
8795 *
8796 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
8797 * the caller) be loading it later and it must be free (see first loop).
8798 */
8799 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
8800 {
8801 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
8802 {
8803 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8804 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
8805 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8806 {
8807 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
8808 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
8809 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
8810 pVar->idxReg = UINT8_MAX;
8811 }
8812 else
8813 {
8814 /* Use ARG0 as temp for stuff we need registers for. */
8815 switch (pVar->enmKind)
8816 {
8817 case kIemNativeVarKind_Stack:
8818 {
8819 uint8_t const idxStackSlot = pVar->idxStackSlot;
8820 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8821 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
8822 iemNativeStackCalcBpDisp(idxStackSlot));
8823 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8824 continue;
8825 }
8826
8827 case kIemNativeVarKind_Immediate:
8828 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
8829 continue;
8830
8831 case kIemNativeVarKind_VarRef:
8832 {
8833 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8834 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8835 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8836 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8837 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8838# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8839 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8840 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8841 if ( fSimdReg
8842 && idxRegOther != UINT8_MAX)
8843 {
8844 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8845 if (cbVar == sizeof(RTUINT128U))
8846 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8847 else
8848 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8849 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8850 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8851 }
8852 else
8853# endif
8854 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8855 {
8856 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8857 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8858 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8859 }
8860 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8861 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8862 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
8863 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8864 continue;
8865 }
8866
8867 case kIemNativeVarKind_GstRegRef:
8868 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
8869 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8870 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8871 continue;
8872
8873 case kIemNativeVarKind_Invalid:
8874 case kIemNativeVarKind_End:
8875 break;
8876 }
8877 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8878 }
8879 }
8880# if 0 //def VBOX_STRICT
8881 iemNativeRegAssertSanity(pReNative);
8882# endif
8883 }
8884#else
8885 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
8886#endif
8887
8888 /*
8889 * Make sure the argument variables are loaded into their respective registers.
8890 *
8891 * We can optimize this by ASSUMING that any register allocations are for
8892 * registeres that have already been loaded and are ready. The previous step
8893 * saw to that.
8894 */
8895 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
8896 {
8897 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8898 {
8899 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8900 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8901 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
8902 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
8903 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
8904 else
8905 {
8906 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8907 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8908 {
8909 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
8910 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
8911 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
8912 | RT_BIT_32(idxArgReg);
8913 pVar->idxReg = idxArgReg;
8914 }
8915 else
8916 {
8917 /* Use ARG0 as temp for stuff we need registers for. */
8918 switch (pVar->enmKind)
8919 {
8920 case kIemNativeVarKind_Stack:
8921 {
8922 uint8_t const idxStackSlot = pVar->idxStackSlot;
8923 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8924 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
8925 continue;
8926 }
8927
8928 case kIemNativeVarKind_Immediate:
8929 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
8930 continue;
8931
8932 case kIemNativeVarKind_VarRef:
8933 {
8934 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8935 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8936 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
8937 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8938 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8939 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8940#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8941 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8942 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8943 if ( fSimdReg
8944 && idxRegOther != UINT8_MAX)
8945 {
8946 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8947 if (cbVar == sizeof(RTUINT128U))
8948 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8949 else
8950 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8951 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8952 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8953 }
8954 else
8955#endif
8956 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8957 {
8958 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8959 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8960 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8961 }
8962 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8963 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8964 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
8965 continue;
8966 }
8967
8968 case kIemNativeVarKind_GstRegRef:
8969 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
8970 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8971 continue;
8972
8973 case kIemNativeVarKind_Invalid:
8974 case kIemNativeVarKind_End:
8975 break;
8976 }
8977 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8978 }
8979 }
8980 }
8981#if 0 //def VBOX_STRICT
8982 iemNativeRegAssertSanity(pReNative);
8983#endif
8984 }
8985#ifdef VBOX_STRICT
8986 else
8987 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8988 {
8989 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
8990 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
8991 }
8992#endif
8993
8994 /*
8995 * Free all argument variables (simplified).
8996 * Their lifetime always expires with the call they are for.
8997 */
8998 /** @todo Make the python script check that arguments aren't used after
8999 * IEM_MC_CALL_XXXX. */
9000 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
9001 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
9002 * an argument value. There is also some FPU stuff. */
9003 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
9004 {
9005 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
9006 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
9007
9008 /* no need to free registers: */
9009 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
9010 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
9011 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
9012 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
9013 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
9014 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
9015
9016 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
9017 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
9018 iemNativeVarFreeStackSlots(pReNative, idxVar);
9019 }
9020 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
9021
9022 /*
9023 * Flush volatile registers as we make the call.
9024 */
9025 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
9026
9027 return off;
9028}
9029
9030
9031
9032/*********************************************************************************************************************************
9033* TLB Lookup. *
9034*********************************************************************************************************************************/
9035
9036/**
9037 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
9038 */
9039DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
9040{
9041 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
9042 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
9043 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
9044 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
9045
9046 /* Do the lookup manually. */
9047 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
9048 uint64_t const uTag = IEMTLB_CALC_TAG( &pVCpu->iem.s.DataTlb, GCPtrFlat);
9049 PIEMTLBENTRY const pTlbe = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
9050 if (RT_LIKELY(pTlbe->uTag == uTag))
9051 {
9052 /*
9053 * Check TLB page table level access flags.
9054 */
9055 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
9056 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
9057 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
9058 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
9059 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
9060 | IEMTLBE_F_PG_UNASSIGNED
9061 | IEMTLBE_F_PT_NO_ACCESSED
9062 | fNoWriteNoDirty | fNoUser);
9063 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
9064 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
9065 {
9066 /*
9067 * Return the address.
9068 */
9069 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
9070 if ((uintptr_t)pbAddr == uResult)
9071 return;
9072 RT_NOREF(cbMem);
9073 AssertFailed();
9074 }
9075 else
9076 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
9077 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
9078 }
9079 else
9080 AssertFailed();
9081 RT_BREAKPOINT();
9082}
9083
9084/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
9085
9086
9087
9088/*********************************************************************************************************************************
9089* Recompiler Core. *
9090*********************************************************************************************************************************/
9091
9092/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
9093static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
9094{
9095 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
9096 pDis->cbCachedInstr += cbMaxRead;
9097 RT_NOREF(cbMinRead);
9098 return VERR_NO_DATA;
9099}
9100
9101
9102DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
9103{
9104 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
9105 {
9106#define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */
9107 ENTRY(fLocalForcedActions),
9108 ENTRY(iem.s.rcPassUp),
9109 ENTRY(iem.s.fExec),
9110 ENTRY(iem.s.pbInstrBuf),
9111 ENTRY(iem.s.uInstrBufPc),
9112 ENTRY(iem.s.GCPhysInstrBuf),
9113 ENTRY(iem.s.cbInstrBufTotal),
9114 ENTRY(iem.s.idxTbCurInstr),
9115#ifdef VBOX_WITH_STATISTICS
9116 ENTRY(iem.s.StatNativeTlbHitsForFetch),
9117 ENTRY(iem.s.StatNativeTlbHitsForStore),
9118 ENTRY(iem.s.StatNativeTlbHitsForStack),
9119 ENTRY(iem.s.StatNativeTlbHitsForMapped),
9120 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
9121 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
9122 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
9123 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
9124#endif
9125 ENTRY(iem.s.DataTlb.aEntries),
9126 ENTRY(iem.s.DataTlb.uTlbRevision),
9127 ENTRY(iem.s.DataTlb.uTlbPhysRev),
9128 ENTRY(iem.s.DataTlb.cTlbHits),
9129 ENTRY(iem.s.CodeTlb.aEntries),
9130 ENTRY(iem.s.CodeTlb.uTlbRevision),
9131 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
9132 ENTRY(iem.s.CodeTlb.cTlbHits),
9133 ENTRY(pVMR3),
9134 ENTRY(cpum.GstCtx.rax),
9135 ENTRY(cpum.GstCtx.ah),
9136 ENTRY(cpum.GstCtx.rcx),
9137 ENTRY(cpum.GstCtx.ch),
9138 ENTRY(cpum.GstCtx.rdx),
9139 ENTRY(cpum.GstCtx.dh),
9140 ENTRY(cpum.GstCtx.rbx),
9141 ENTRY(cpum.GstCtx.bh),
9142 ENTRY(cpum.GstCtx.rsp),
9143 ENTRY(cpum.GstCtx.rbp),
9144 ENTRY(cpum.GstCtx.rsi),
9145 ENTRY(cpum.GstCtx.rdi),
9146 ENTRY(cpum.GstCtx.r8),
9147 ENTRY(cpum.GstCtx.r9),
9148 ENTRY(cpum.GstCtx.r10),
9149 ENTRY(cpum.GstCtx.r11),
9150 ENTRY(cpum.GstCtx.r12),
9151 ENTRY(cpum.GstCtx.r13),
9152 ENTRY(cpum.GstCtx.r14),
9153 ENTRY(cpum.GstCtx.r15),
9154 ENTRY(cpum.GstCtx.es.Sel),
9155 ENTRY(cpum.GstCtx.es.u64Base),
9156 ENTRY(cpum.GstCtx.es.u32Limit),
9157 ENTRY(cpum.GstCtx.es.Attr),
9158 ENTRY(cpum.GstCtx.cs.Sel),
9159 ENTRY(cpum.GstCtx.cs.u64Base),
9160 ENTRY(cpum.GstCtx.cs.u32Limit),
9161 ENTRY(cpum.GstCtx.cs.Attr),
9162 ENTRY(cpum.GstCtx.ss.Sel),
9163 ENTRY(cpum.GstCtx.ss.u64Base),
9164 ENTRY(cpum.GstCtx.ss.u32Limit),
9165 ENTRY(cpum.GstCtx.ss.Attr),
9166 ENTRY(cpum.GstCtx.ds.Sel),
9167 ENTRY(cpum.GstCtx.ds.u64Base),
9168 ENTRY(cpum.GstCtx.ds.u32Limit),
9169 ENTRY(cpum.GstCtx.ds.Attr),
9170 ENTRY(cpum.GstCtx.fs.Sel),
9171 ENTRY(cpum.GstCtx.fs.u64Base),
9172 ENTRY(cpum.GstCtx.fs.u32Limit),
9173 ENTRY(cpum.GstCtx.fs.Attr),
9174 ENTRY(cpum.GstCtx.gs.Sel),
9175 ENTRY(cpum.GstCtx.gs.u64Base),
9176 ENTRY(cpum.GstCtx.gs.u32Limit),
9177 ENTRY(cpum.GstCtx.gs.Attr),
9178 ENTRY(cpum.GstCtx.rip),
9179 ENTRY(cpum.GstCtx.eflags),
9180 ENTRY(cpum.GstCtx.uRipInhibitInt),
9181 ENTRY(cpum.GstCtx.cr0),
9182 ENTRY(cpum.GstCtx.cr4),
9183 ENTRY(cpum.GstCtx.aXcr[0]),
9184 ENTRY(cpum.GstCtx.aXcr[1]),
9185#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9186 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
9187 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
9188 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
9189 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
9190 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
9191 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
9192 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
9193 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
9194 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
9195 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
9196 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
9197 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
9198 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
9199 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
9200 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
9201 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
9202 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
9203 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
9204 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
9205 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
9206 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
9207 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
9208 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
9209 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
9210 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
9211 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
9212 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
9213 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
9214 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
9215 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
9216 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
9217 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
9218#endif
9219#undef ENTRY
9220 };
9221#ifdef VBOX_STRICT
9222 static bool s_fOrderChecked = false;
9223 if (!s_fOrderChecked)
9224 {
9225 s_fOrderChecked = true;
9226 uint32_t offPrev = s_aMembers[0].off;
9227 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
9228 {
9229 Assert(s_aMembers[i].off > offPrev);
9230 offPrev = s_aMembers[i].off;
9231 }
9232 }
9233#endif
9234
9235 /*
9236 * Binary lookup.
9237 */
9238 unsigned iStart = 0;
9239 unsigned iEnd = RT_ELEMENTS(s_aMembers);
9240 for (;;)
9241 {
9242 unsigned const iCur = iStart + (iEnd - iStart) / 2;
9243 uint32_t const offCur = s_aMembers[iCur].off;
9244 if (off < offCur)
9245 {
9246 if (iCur != iStart)
9247 iEnd = iCur;
9248 else
9249 break;
9250 }
9251 else if (off > offCur)
9252 {
9253 if (iCur + 1 < iEnd)
9254 iStart = iCur + 1;
9255 else
9256 break;
9257 }
9258 else
9259 return s_aMembers[iCur].pszName;
9260 }
9261#ifdef VBOX_WITH_STATISTICS
9262 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
9263 return "iem.s.acThreadedFuncStats[iFn]";
9264#endif
9265 return NULL;
9266}
9267
9268
9269DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
9270{
9271 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
9272#if defined(RT_ARCH_AMD64)
9273 static const char * const a_apszMarkers[] =
9274 {
9275 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
9276 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
9277 };
9278#endif
9279
9280 char szDisBuf[512];
9281 DISSTATE Dis;
9282 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
9283 uint32_t const cNative = pTb->Native.cInstructions;
9284 uint32_t offNative = 0;
9285#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9286 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
9287#endif
9288 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9289 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9290 : DISCPUMODE_64BIT;
9291#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9292 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
9293#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9294 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
9295#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9296# error "Port me"
9297#else
9298 csh hDisasm = ~(size_t)0;
9299# if defined(RT_ARCH_AMD64)
9300 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
9301# elif defined(RT_ARCH_ARM64)
9302 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
9303# else
9304# error "Port me"
9305# endif
9306 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
9307
9308 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
9309 //Assert(rcCs == CS_ERR_OK);
9310#endif
9311
9312 /*
9313 * Print TB info.
9314 */
9315 pHlp->pfnPrintf(pHlp,
9316 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
9317 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
9318 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
9319 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
9320#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9321 if (pDbgInfo && pDbgInfo->cEntries > 1)
9322 {
9323 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
9324
9325 /*
9326 * This disassembly is driven by the debug info which follows the native
9327 * code and indicates when it starts with the next guest instructions,
9328 * where labels are and such things.
9329 */
9330 uint32_t idxThreadedCall = 0;
9331 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
9332 uint8_t idxRange = UINT8_MAX;
9333 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
9334 uint32_t offRange = 0;
9335 uint32_t offOpcodes = 0;
9336 uint32_t const cbOpcodes = pTb->cbOpcodes;
9337 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
9338 uint32_t const cDbgEntries = pDbgInfo->cEntries;
9339 uint32_t iDbgEntry = 1;
9340 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
9341
9342 while (offNative < cNative)
9343 {
9344 /* If we're at or have passed the point where the next chunk of debug
9345 info starts, process it. */
9346 if (offDbgNativeNext <= offNative)
9347 {
9348 offDbgNativeNext = UINT32_MAX;
9349 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
9350 {
9351 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
9352 {
9353 case kIemTbDbgEntryType_GuestInstruction:
9354 {
9355 /* Did the exec flag change? */
9356 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
9357 {
9358 pHlp->pfnPrintf(pHlp,
9359 " fExec change %#08x -> %#08x %s\n",
9360 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9361 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9362 szDisBuf, sizeof(szDisBuf)));
9363 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
9364 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9365 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9366 : DISCPUMODE_64BIT;
9367 }
9368
9369 /* New opcode range? We need to fend up a spurious debug info entry here for cases
9370 where the compilation was aborted before the opcode was recorded and the actual
9371 instruction was translated to a threaded call. This may happen when we run out
9372 of ranges, or when some complicated interrupts/FFs are found to be pending or
9373 similar. So, we just deal with it here rather than in the compiler code as it
9374 is a lot simpler to do here. */
9375 if ( idxRange == UINT8_MAX
9376 || idxRange >= cRanges
9377 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
9378 {
9379 idxRange += 1;
9380 if (idxRange < cRanges)
9381 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
9382 else
9383 continue;
9384 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
9385 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
9386 + (pTb->aRanges[idxRange].idxPhysPage == 0
9387 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9388 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
9389 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9390 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
9391 pTb->aRanges[idxRange].idxPhysPage);
9392 GCPhysPc += offRange;
9393 }
9394
9395 /* Disassemble the instruction. */
9396 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
9397 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
9398 uint32_t cbInstr = 1;
9399 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9400 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
9401 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9402 if (RT_SUCCESS(rc))
9403 {
9404 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9405 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9406 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9407 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9408
9409 static unsigned const s_offMarker = 55;
9410 static char const s_szMarker[] = " ; <--- guest";
9411 if (cch < s_offMarker)
9412 {
9413 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
9414 cch = s_offMarker;
9415 }
9416 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
9417 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
9418
9419 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
9420 }
9421 else
9422 {
9423 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
9424 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
9425 cbInstr = 1;
9426 }
9427 GCPhysPc += cbInstr;
9428 offOpcodes += cbInstr;
9429 offRange += cbInstr;
9430 continue;
9431 }
9432
9433 case kIemTbDbgEntryType_ThreadedCall:
9434 pHlp->pfnPrintf(pHlp,
9435 " Call #%u to %s (%u args) - %s\n",
9436 idxThreadedCall,
9437 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9438 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9439 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
9440 idxThreadedCall++;
9441 continue;
9442
9443 case kIemTbDbgEntryType_GuestRegShadowing:
9444 {
9445 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9446 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
9447 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
9448 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s (Dirty: %RTbool)\n", pszGstReg,
9449 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev],
9450 RT_BOOL(pEntry->GuestRegShadowing.fDirty));
9451 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
9452 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (Dirty: %RTbool)\n", pszGstReg,
9453 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
9454 RT_BOOL(pEntry->GuestRegShadowing.fDirty));
9455 else
9456 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s, Dirty: %RTbool)\n", pszGstReg,
9457 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
9458 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev],
9459 RT_BOOL(pEntry->GuestRegShadowing.fDirty));
9460 continue;
9461 }
9462
9463#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9464 case kIemTbDbgEntryType_GuestSimdRegShadowing:
9465 {
9466 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9467 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
9468 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
9469 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
9470 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9471 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
9472 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
9473 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
9474 else
9475 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
9476 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
9477 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9478 continue;
9479 }
9480#endif
9481
9482 case kIemTbDbgEntryType_Label:
9483 {
9484 const char *pszName = "what_the_fudge";
9485 const char *pszComment = "";
9486 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
9487 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
9488 {
9489 case kIemNativeLabelType_Return: pszName = "Return"; break;
9490 case kIemNativeLabelType_ReturnBreak: pszName = "ReturnBreak"; break;
9491 case kIemNativeLabelType_ReturnWithFlags: pszName = "ReturnWithFlags"; break;
9492 case kIemNativeLabelType_NonZeroRetOrPassUp: pszName = "NonZeroRetOrPassUp"; break;
9493 case kIemNativeLabelType_RaiseDe: pszName = "RaiseDe"; break;
9494 case kIemNativeLabelType_RaiseUd: pszName = "RaiseUd"; break;
9495 case kIemNativeLabelType_RaiseSseRelated: pszName = "RaiseSseRelated"; break;
9496 case kIemNativeLabelType_RaiseAvxRelated: pszName = "RaiseAvxRelated"; break;
9497 case kIemNativeLabelType_RaiseSseAvxFpRelated: pszName = "RaiseSseAvxFpRelated"; break;
9498 case kIemNativeLabelType_RaiseNm: pszName = "RaiseNm"; break;
9499 case kIemNativeLabelType_RaiseGp0: pszName = "RaiseGp0"; break;
9500 case kIemNativeLabelType_RaiseMf: pszName = "RaiseMf"; break;
9501 case kIemNativeLabelType_RaiseXf: pszName = "RaiseXf"; break;
9502 case kIemNativeLabelType_ObsoleteTb: pszName = "ObsoleteTb"; break;
9503 case kIemNativeLabelType_NeedCsLimChecking: pszName = "NeedCsLimChecking"; break;
9504 case kIemNativeLabelType_CheckBranchMiss: pszName = "CheckBranchMiss"; break;
9505 case kIemNativeLabelType_If:
9506 pszName = "If";
9507 fNumbered = true;
9508 break;
9509 case kIemNativeLabelType_Else:
9510 pszName = "Else";
9511 fNumbered = true;
9512 pszComment = " ; regs state restored pre-if-block";
9513 break;
9514 case kIemNativeLabelType_Endif:
9515 pszName = "Endif";
9516 fNumbered = true;
9517 break;
9518 case kIemNativeLabelType_CheckIrq:
9519 pszName = "CheckIrq_CheckVM";
9520 fNumbered = true;
9521 break;
9522 case kIemNativeLabelType_TlbLookup:
9523 pszName = "TlbLookup";
9524 fNumbered = true;
9525 break;
9526 case kIemNativeLabelType_TlbMiss:
9527 pszName = "TlbMiss";
9528 fNumbered = true;
9529 break;
9530 case kIemNativeLabelType_TlbDone:
9531 pszName = "TlbDone";
9532 fNumbered = true;
9533 break;
9534 case kIemNativeLabelType_Invalid:
9535 case kIemNativeLabelType_End:
9536 break;
9537 }
9538 if (fNumbered)
9539 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
9540 else
9541 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
9542 continue;
9543 }
9544
9545 case kIemTbDbgEntryType_NativeOffset:
9546 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
9547 Assert(offDbgNativeNext > offNative);
9548 break;
9549
9550#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
9551 case kIemTbDbgEntryType_DelayedPcUpdate:
9552 pHlp->pfnPrintf(pHlp,
9553 " Updating guest PC value by %u (cInstrSkipped=%u)\n",
9554 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
9555 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
9556 continue;
9557#endif
9558
9559 default:
9560 AssertFailed();
9561 }
9562 iDbgEntry++;
9563 break;
9564 }
9565 }
9566
9567 /*
9568 * Disassemble the next native instruction.
9569 */
9570 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9571# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9572 uint32_t cbInstr = sizeof(paNative[0]);
9573 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9574 if (RT_SUCCESS(rc))
9575 {
9576# if defined(RT_ARCH_AMD64)
9577 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9578 {
9579 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9580 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9581 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9582 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9583 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9584 uInfo & 0x8000 ? "recompiled" : "todo");
9585 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9586 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9587 else
9588 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9589 }
9590 else
9591# endif
9592 {
9593 const char *pszAnnotation = NULL;
9594# ifdef RT_ARCH_AMD64
9595 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9596 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9597 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9598 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9599 PCDISOPPARAM pMemOp;
9600 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))
9601 pMemOp = &Dis.Param1;
9602 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))
9603 pMemOp = &Dis.Param2;
9604 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))
9605 pMemOp = &Dis.Param3;
9606 else
9607 pMemOp = NULL;
9608 if ( pMemOp
9609 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
9610 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
9611 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
9612 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
9613
9614#elif defined(RT_ARCH_ARM64)
9615 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9616 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9617 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9618# else
9619# error "Port me"
9620# endif
9621 if (pszAnnotation)
9622 {
9623 static unsigned const s_offAnnotation = 55;
9624 size_t const cchAnnotation = strlen(pszAnnotation);
9625 size_t cchDis = strlen(szDisBuf);
9626 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
9627 {
9628 if (cchDis < s_offAnnotation)
9629 {
9630 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
9631 cchDis = s_offAnnotation;
9632 }
9633 szDisBuf[cchDis++] = ' ';
9634 szDisBuf[cchDis++] = ';';
9635 szDisBuf[cchDis++] = ' ';
9636 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
9637 }
9638 }
9639 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9640 }
9641 }
9642 else
9643 {
9644# if defined(RT_ARCH_AMD64)
9645 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9646 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9647# elif defined(RT_ARCH_ARM64)
9648 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9649# else
9650# error "Port me"
9651# endif
9652 cbInstr = sizeof(paNative[0]);
9653 }
9654 offNative += cbInstr / sizeof(paNative[0]);
9655
9656# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9657 cs_insn *pInstr;
9658 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9659 (uintptr_t)pNativeCur, 1, &pInstr);
9660 if (cInstrs > 0)
9661 {
9662 Assert(cInstrs == 1);
9663 const char *pszAnnotation = NULL;
9664# if defined(RT_ARCH_ARM64)
9665 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
9666 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
9667 {
9668 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
9669 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
9670 char *psz = strchr(pInstr->op_str, '[');
9671 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
9672 {
9673 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
9674 int32_t off = -1;
9675 psz += 4;
9676 if (*psz == ']')
9677 off = 0;
9678 else if (*psz == ',')
9679 {
9680 psz = RTStrStripL(psz + 1);
9681 if (*psz == '#')
9682 off = RTStrToInt32(&psz[1]);
9683 /** @todo deal with index registers and LSL as well... */
9684 }
9685 if (off >= 0)
9686 pszAnnotation = iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
9687 }
9688 }
9689# endif
9690
9691 size_t const cchOp = strlen(pInstr->op_str);
9692# if defined(RT_ARCH_AMD64)
9693 if (pszAnnotation)
9694 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
9695 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
9696 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9697 else
9698 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9699 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9700
9701# else
9702 if (pszAnnotation)
9703 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
9704 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
9705 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9706 else
9707 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9708 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9709# endif
9710 offNative += pInstr->size / sizeof(*pNativeCur);
9711 cs_free(pInstr, cInstrs);
9712 }
9713 else
9714 {
9715# if defined(RT_ARCH_AMD64)
9716 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9717 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9718# else
9719 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9720# endif
9721 offNative++;
9722 }
9723# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9724 }
9725 }
9726 else
9727#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
9728 {
9729 /*
9730 * No debug info, just disassemble the x86 code and then the native code.
9731 *
9732 * First the guest code:
9733 */
9734 for (unsigned i = 0; i < pTb->cRanges; i++)
9735 {
9736 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
9737 + (pTb->aRanges[i].idxPhysPage == 0
9738 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9739 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
9740 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9741 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
9742 unsigned off = pTb->aRanges[i].offOpcodes;
9743 /** @todo this ain't working when crossing pages! */
9744 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
9745 while (off < cbOpcodes)
9746 {
9747 uint32_t cbInstr = 1;
9748 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9749 &pTb->pabOpcodes[off], cbOpcodes - off,
9750 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9751 if (RT_SUCCESS(rc))
9752 {
9753 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9754 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9755 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9756 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9757 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
9758 GCPhysPc += cbInstr;
9759 off += cbInstr;
9760 }
9761 else
9762 {
9763 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
9764 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
9765 break;
9766 }
9767 }
9768 }
9769
9770 /*
9771 * Then the native code:
9772 */
9773 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
9774 while (offNative < cNative)
9775 {
9776 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9777# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9778 uint32_t cbInstr = sizeof(paNative[0]);
9779 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9780 if (RT_SUCCESS(rc))
9781 {
9782# if defined(RT_ARCH_AMD64)
9783 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9784 {
9785 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9786 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9787 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9788 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9789 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9790 uInfo & 0x8000 ? "recompiled" : "todo");
9791 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9792 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9793 else
9794 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9795 }
9796 else
9797# endif
9798 {
9799# ifdef RT_ARCH_AMD64
9800 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9801 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9802 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9803 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9804# elif defined(RT_ARCH_ARM64)
9805 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9806 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9807 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9808# else
9809# error "Port me"
9810# endif
9811 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9812 }
9813 }
9814 else
9815 {
9816# if defined(RT_ARCH_AMD64)
9817 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9818 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9819# else
9820 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9821# endif
9822 cbInstr = sizeof(paNative[0]);
9823 }
9824 offNative += cbInstr / sizeof(paNative[0]);
9825
9826# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9827 cs_insn *pInstr;
9828 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9829 (uintptr_t)pNativeCur, 1, &pInstr);
9830 if (cInstrs > 0)
9831 {
9832 Assert(cInstrs == 1);
9833# if defined(RT_ARCH_AMD64)
9834 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9835 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9836# else
9837 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9838 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9839# endif
9840 offNative += pInstr->size / sizeof(*pNativeCur);
9841 cs_free(pInstr, cInstrs);
9842 }
9843 else
9844 {
9845# if defined(RT_ARCH_AMD64)
9846 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9847 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9848# else
9849 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9850# endif
9851 offNative++;
9852 }
9853# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9854 }
9855 }
9856
9857#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9858 /* Cleanup. */
9859 cs_close(&hDisasm);
9860#endif
9861}
9862
9863
9864/**
9865 * Recompiles the given threaded TB into a native one.
9866 *
9867 * In case of failure the translation block will be returned as-is.
9868 *
9869 * @returns pTb.
9870 * @param pVCpu The cross context virtual CPU structure of the calling
9871 * thread.
9872 * @param pTb The threaded translation to recompile to native.
9873 */
9874DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
9875{
9876 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
9877
9878 /*
9879 * The first time thru, we allocate the recompiler state, the other times
9880 * we just need to reset it before using it again.
9881 */
9882 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
9883 if (RT_LIKELY(pReNative))
9884 iemNativeReInit(pReNative, pTb);
9885 else
9886 {
9887 pReNative = iemNativeInit(pVCpu, pTb);
9888 AssertReturn(pReNative, pTb);
9889 }
9890
9891#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9892 /*
9893 * First do liveness analysis. This is done backwards.
9894 */
9895 {
9896 uint32_t idxCall = pTb->Thrd.cCalls;
9897 if (idxCall <= pReNative->cLivenessEntriesAlloc)
9898 { /* likely */ }
9899 else
9900 {
9901 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
9902 while (idxCall > cAlloc)
9903 cAlloc *= 2;
9904 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
9905 AssertReturn(pvNew, pTb);
9906 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
9907 pReNative->cLivenessEntriesAlloc = cAlloc;
9908 }
9909 AssertReturn(idxCall > 0, pTb);
9910 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
9911
9912 /* The initial (final) entry. */
9913 idxCall--;
9914 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
9915
9916 /* Loop backwards thru the calls and fill in the other entries. */
9917 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
9918 while (idxCall > 0)
9919 {
9920 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
9921 if (pfnLiveness)
9922 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
9923 else
9924 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
9925 pCallEntry--;
9926 idxCall--;
9927 }
9928
9929# ifdef VBOX_WITH_STATISTICS
9930 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
9931 to 'clobbered' rather that 'input'. */
9932 /** @todo */
9933# endif
9934 }
9935#endif
9936
9937 /*
9938 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
9939 * for aborting if an error happens.
9940 */
9941 uint32_t cCallsLeft = pTb->Thrd.cCalls;
9942#ifdef LOG_ENABLED
9943 uint32_t const cCallsOrg = cCallsLeft;
9944#endif
9945 uint32_t off = 0;
9946 int rc = VINF_SUCCESS;
9947 IEMNATIVE_TRY_SETJMP(pReNative, rc)
9948 {
9949 /*
9950 * Emit prolog code (fixed).
9951 */
9952 off = iemNativeEmitProlog(pReNative, off);
9953
9954 /*
9955 * Convert the calls to native code.
9956 */
9957#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9958 int32_t iGstInstr = -1;
9959#endif
9960#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
9961 uint32_t cThreadedCalls = 0;
9962 uint32_t cRecompiledCalls = 0;
9963#endif
9964#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9965 uint32_t idxCurCall = 0;
9966#endif
9967 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
9968 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
9969 while (cCallsLeft-- > 0)
9970 {
9971 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
9972#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9973 pReNative->idxCurCall = idxCurCall;
9974#endif
9975
9976 /*
9977 * Debug info, assembly markup and statistics.
9978 */
9979#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
9980 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
9981 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
9982#endif
9983#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9984 iemNativeDbgInfoAddNativeOffset(pReNative, off);
9985 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
9986 {
9987 if (iGstInstr < (int32_t)pTb->cInstructions)
9988 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
9989 else
9990 Assert(iGstInstr == pTb->cInstructions);
9991 iGstInstr = pCallEntry->idxInstr;
9992 }
9993 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
9994#endif
9995#if defined(VBOX_STRICT)
9996 off = iemNativeEmitMarker(pReNative, off,
9997 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
9998#endif
9999#if defined(VBOX_STRICT)
10000 iemNativeRegAssertSanity(pReNative);
10001#endif
10002#ifdef VBOX_WITH_STATISTICS
10003 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
10004#endif
10005
10006 /*
10007 * Actual work.
10008 */
10009 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
10010 pfnRecom ? "(recompiled)" : "(todo)"));
10011 if (pfnRecom) /** @todo stats on this. */
10012 {
10013 off = pfnRecom(pReNative, off, pCallEntry);
10014 STAM_REL_STATS({cRecompiledCalls++;});
10015 }
10016 else
10017 {
10018 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
10019 STAM_REL_STATS({cThreadedCalls++;});
10020 }
10021 Assert(off <= pReNative->cInstrBufAlloc);
10022 Assert(pReNative->cCondDepth == 0);
10023
10024#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
10025 if (LogIs2Enabled())
10026 {
10027 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
10028# ifndef IEMLIVENESS_EXTENDED_LAYOUT
10029 static const char s_achState[] = "CUXI";
10030# else
10031 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
10032# endif
10033
10034 char szGpr[17];
10035 for (unsigned i = 0; i < 16; i++)
10036 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
10037 szGpr[16] = '\0';
10038
10039 char szSegBase[X86_SREG_COUNT + 1];
10040 char szSegLimit[X86_SREG_COUNT + 1];
10041 char szSegAttrib[X86_SREG_COUNT + 1];
10042 char szSegSel[X86_SREG_COUNT + 1];
10043 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
10044 {
10045 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
10046 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
10047 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
10048 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
10049 }
10050 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
10051 = szSegSel[X86_SREG_COUNT] = '\0';
10052
10053 char szEFlags[8];
10054 for (unsigned i = 0; i < 7; i++)
10055 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
10056 szEFlags[7] = '\0';
10057
10058 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
10059 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
10060 }
10061#endif
10062
10063 /*
10064 * Advance.
10065 */
10066 pCallEntry++;
10067#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
10068 idxCurCall++;
10069#endif
10070 }
10071
10072 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
10073 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
10074 if (!cThreadedCalls)
10075 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
10076
10077 /*
10078 * Emit the epilog code.
10079 */
10080 uint32_t idxReturnLabel;
10081 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
10082
10083 /*
10084 * Generate special jump labels.
10085 */
10086 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
10087 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
10088 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
10089 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
10090
10091 /*
10092 * Generate simple TB tail labels that just calls a help with a pVCpu
10093 * arg and either return or longjmps/throws a non-zero status.
10094 *
10095 * The array entries must be ordered by enmLabel value so we can index
10096 * using fTailLabels bit numbers.
10097 */
10098 typedef IEM_DECL_NATIVE_HLP_PTR(int, PFNIEMNATIVESIMPLETAILLABELCALL,(PVMCPUCC pVCpu));
10099 static struct
10100 {
10101 IEMNATIVELABELTYPE enmLabel;
10102 PFNIEMNATIVESIMPLETAILLABELCALL pfnCallback;
10103 } const g_aSimpleTailLabels[] =
10104 {
10105 { kIemNativeLabelType_Invalid, NULL },
10106 { kIemNativeLabelType_RaiseDe, iemNativeHlpExecRaiseDe },
10107 { kIemNativeLabelType_RaiseUd, iemNativeHlpExecRaiseUd },
10108 { kIemNativeLabelType_RaiseSseRelated, iemNativeHlpExecRaiseSseRelated },
10109 { kIemNativeLabelType_RaiseAvxRelated, iemNativeHlpExecRaiseAvxRelated },
10110 { kIemNativeLabelType_RaiseSseAvxFpRelated, iemNativeHlpExecRaiseSseAvxFpRelated },
10111 { kIemNativeLabelType_RaiseNm, iemNativeHlpExecRaiseNm },
10112 { kIemNativeLabelType_RaiseGp0, iemNativeHlpExecRaiseGp0 },
10113 { kIemNativeLabelType_RaiseMf, iemNativeHlpExecRaiseMf },
10114 { kIemNativeLabelType_RaiseXf, iemNativeHlpExecRaiseXf },
10115 { kIemNativeLabelType_ObsoleteTb, iemNativeHlpObsoleteTb },
10116 { kIemNativeLabelType_NeedCsLimChecking, iemNativeHlpNeedCsLimChecking },
10117 { kIemNativeLabelType_CheckBranchMiss, iemNativeHlpCheckBranchMiss },
10118 };
10119 AssertCompile(RT_ELEMENTS(g_aSimpleTailLabels) == (unsigned)kIemNativeLabelType_LastSimple + 1U);
10120 AssertCompile(kIemNativeLabelType_Invalid == 0);
10121 uint64_t fTailLabels = pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastSimple + 1U) - 2U);
10122 if (fTailLabels)
10123 {
10124 do
10125 {
10126 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)(ASMBitFirstSetU64(fTailLabels) - 1U);
10127 fTailLabels &= ~RT_BIT_64(enmLabel);
10128 Assert(g_aSimpleTailLabels[enmLabel].enmLabel == enmLabel);
10129
10130 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
10131 Assert(idxLabel != UINT32_MAX);
10132 if (idxLabel != UINT32_MAX)
10133 {
10134 iemNativeLabelDefine(pReNative, idxLabel, off);
10135
10136 /* int pfnCallback(PVMCPUCC pVCpu) */
10137 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
10138 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_aSimpleTailLabels[enmLabel].pfnCallback);
10139
10140 /* jump back to the return sequence. */
10141 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
10142 }
10143
10144 } while (fTailLabels);
10145 }
10146 }
10147 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
10148 {
10149 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
10150 return pTb;
10151 }
10152 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
10153 Assert(off <= pReNative->cInstrBufAlloc);
10154
10155 /*
10156 * Make sure all labels has been defined.
10157 */
10158 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
10159#ifdef VBOX_STRICT
10160 uint32_t const cLabels = pReNative->cLabels;
10161 for (uint32_t i = 0; i < cLabels; i++)
10162 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
10163#endif
10164
10165 /*
10166 * Allocate executable memory, copy over the code we've generated.
10167 */
10168 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
10169 if (pTbAllocator->pDelayedFreeHead)
10170 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
10171
10172 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
10173 AssertReturn(paFinalInstrBuf, pTb);
10174 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
10175
10176 /*
10177 * Apply fixups.
10178 */
10179 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
10180 uint32_t const cFixups = pReNative->cFixups;
10181 for (uint32_t i = 0; i < cFixups; i++)
10182 {
10183 Assert(paFixups[i].off < off);
10184 Assert(paFixups[i].idxLabel < cLabels);
10185 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
10186 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
10187 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
10188 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
10189 switch (paFixups[i].enmType)
10190 {
10191#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
10192 case kIemNativeFixupType_Rel32:
10193 Assert(paFixups[i].off + 4 <= off);
10194 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10195 continue;
10196
10197#elif defined(RT_ARCH_ARM64)
10198 case kIemNativeFixupType_RelImm26At0:
10199 {
10200 Assert(paFixups[i].off < off);
10201 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10202 Assert(offDisp >= -262144 && offDisp < 262144);
10203 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
10204 continue;
10205 }
10206
10207 case kIemNativeFixupType_RelImm19At5:
10208 {
10209 Assert(paFixups[i].off < off);
10210 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10211 Assert(offDisp >= -262144 && offDisp < 262144);
10212 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
10213 continue;
10214 }
10215
10216 case kIemNativeFixupType_RelImm14At5:
10217 {
10218 Assert(paFixups[i].off < off);
10219 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10220 Assert(offDisp >= -8192 && offDisp < 8192);
10221 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
10222 continue;
10223 }
10224
10225#endif
10226 case kIemNativeFixupType_Invalid:
10227 case kIemNativeFixupType_End:
10228 break;
10229 }
10230 AssertFailed();
10231 }
10232
10233 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
10234 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
10235
10236 /*
10237 * Convert the translation block.
10238 */
10239 RTMemFree(pTb->Thrd.paCalls);
10240 pTb->Native.paInstructions = paFinalInstrBuf;
10241 pTb->Native.cInstructions = off;
10242 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
10243#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10244 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
10245 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
10246#endif
10247
10248 Assert(pTbAllocator->cThreadedTbs > 0);
10249 pTbAllocator->cThreadedTbs -= 1;
10250 pTbAllocator->cNativeTbs += 1;
10251 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
10252
10253#ifdef LOG_ENABLED
10254 /*
10255 * Disassemble to the log if enabled.
10256 */
10257 if (LogIs3Enabled())
10258 {
10259 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
10260 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
10261# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
10262 RTLogFlush(NULL);
10263# endif
10264 }
10265#endif
10266 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
10267
10268 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
10269 return pTb;
10270}
10271
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette