VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 104095

Last change on this file since 104095 was 104095, checked in by vboxsync, 8 months ago

VMM/IEM: Release statistics on how often the exec memory allocator fails to allocate a large enough instruction buffer, bugref:10370

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 464.9 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 104095 2024-03-27 15:46:10Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMN8veRecompilerTlbLookup.h"
103#include "IEMNativeFunctions.h"
104
105
106/*
107 * Narrow down configs here to avoid wasting time on unused configs here.
108 * Note! Same checks in IEMAllThrdRecompiler.cpp.
109 */
110
111#ifndef IEM_WITH_CODE_TLB
112# error The code TLB must be enabled for the recompiler.
113#endif
114
115#ifndef IEM_WITH_DATA_TLB
116# error The data TLB must be enabled for the recompiler.
117#endif
118
119#ifndef IEM_WITH_SETJMP
120# error The setjmp approach must be enabled for the recompiler.
121#endif
122
123/** @todo eliminate this clang build hack. */
124#if RT_CLANG_PREREQ(4, 0)
125# pragma GCC diagnostic ignored "-Wunused-function"
126#endif
127
128
129/*********************************************************************************************************************************
130* Internal Functions *
131*********************************************************************************************************************************/
132#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
133static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
134#endif
135DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
136DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
137 IEMNATIVEGSTREG enmGstReg, uint32_t off);
138DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
139
140
141/*********************************************************************************************************************************
142* Executable Memory Allocator *
143*********************************************************************************************************************************/
144/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
145 * Use an alternative chunk sub-allocator that does store internal data
146 * in the chunk.
147 *
148 * Using the RTHeapSimple is not practial on newer darwin systems where
149 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
150 * memory. We would have to change the protection of the whole chunk for
151 * every call to RTHeapSimple, which would be rather expensive.
152 *
153 * This alternative implemenation let restrict page protection modifications
154 * to the pages backing the executable memory we just allocated.
155 */
156#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
157/** The chunk sub-allocation unit size in bytes. */
158#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
159/** The chunk sub-allocation unit size as a shift factor. */
160#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
161
162#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
163# ifdef IEMNATIVE_USE_GDB_JIT
164# define IEMNATIVE_USE_GDB_JIT_ET_DYN
165
166/** GDB JIT: Code entry. */
167typedef struct GDBJITCODEENTRY
168{
169 struct GDBJITCODEENTRY *pNext;
170 struct GDBJITCODEENTRY *pPrev;
171 uint8_t *pbSymFile;
172 uint64_t cbSymFile;
173} GDBJITCODEENTRY;
174
175/** GDB JIT: Actions. */
176typedef enum GDBJITACTIONS : uint32_t
177{
178 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
179} GDBJITACTIONS;
180
181/** GDB JIT: Descriptor. */
182typedef struct GDBJITDESCRIPTOR
183{
184 uint32_t uVersion;
185 GDBJITACTIONS enmAction;
186 GDBJITCODEENTRY *pRelevant;
187 GDBJITCODEENTRY *pHead;
188 /** Our addition: */
189 GDBJITCODEENTRY *pTail;
190} GDBJITDESCRIPTOR;
191
192/** GDB JIT: Our simple symbol file data. */
193typedef struct GDBJITSYMFILE
194{
195 Elf64_Ehdr EHdr;
196# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
197 Elf64_Shdr aShdrs[5];
198# else
199 Elf64_Shdr aShdrs[7];
200 Elf64_Phdr aPhdrs[2];
201# endif
202 /** The dwarf ehframe data for the chunk. */
203 uint8_t abEhFrame[512];
204 char szzStrTab[128];
205 Elf64_Sym aSymbols[3];
206# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
207 Elf64_Sym aDynSyms[2];
208 Elf64_Dyn aDyn[6];
209# endif
210} GDBJITSYMFILE;
211
212extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
213extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
214
215/** Init once for g_IemNativeGdbJitLock. */
216static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
217/** Init once for the critical section. */
218static RTCRITSECT g_IemNativeGdbJitLock;
219
220/** GDB reads the info here. */
221GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
222
223/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
224DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
225{
226 ASMNopPause();
227}
228
229/** @callback_method_impl{FNRTONCE} */
230static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
231{
232 RT_NOREF(pvUser);
233 return RTCritSectInit(&g_IemNativeGdbJitLock);
234}
235
236
237# endif /* IEMNATIVE_USE_GDB_JIT */
238
239/**
240 * Per-chunk unwind info for non-windows hosts.
241 */
242typedef struct IEMEXECMEMCHUNKEHFRAME
243{
244# ifdef IEMNATIVE_USE_LIBUNWIND
245 /** The offset of the FDA into abEhFrame. */
246 uintptr_t offFda;
247# else
248 /** 'struct object' storage area. */
249 uint8_t abObject[1024];
250# endif
251# ifdef IEMNATIVE_USE_GDB_JIT
252# if 0
253 /** The GDB JIT 'symbol file' data. */
254 GDBJITSYMFILE GdbJitSymFile;
255# endif
256 /** The GDB JIT list entry. */
257 GDBJITCODEENTRY GdbJitEntry;
258# endif
259 /** The dwarf ehframe data for the chunk. */
260 uint8_t abEhFrame[512];
261} IEMEXECMEMCHUNKEHFRAME;
262/** Pointer to per-chunk info info for non-windows hosts. */
263typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
264#endif
265
266
267/**
268 * An chunk of executable memory.
269 */
270typedef struct IEMEXECMEMCHUNK
271{
272#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
273 /** Number of free items in this chunk. */
274 uint32_t cFreeUnits;
275 /** Hint were to start searching for free space in the allocation bitmap. */
276 uint32_t idxFreeHint;
277#else
278 /** The heap handle. */
279 RTHEAPSIMPLE hHeap;
280#endif
281 /** Pointer to the chunk. */
282 void *pvChunk;
283#ifdef IN_RING3
284 /**
285 * Pointer to the unwind information.
286 *
287 * This is used during C++ throw and longjmp (windows and probably most other
288 * platforms). Some debuggers (windbg) makes use of it as well.
289 *
290 * Windows: This is allocated from hHeap on windows because (at least for
291 * AMD64) the UNWIND_INFO structure address in the
292 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
293 *
294 * Others: Allocated from the regular heap to avoid unnecessary executable data
295 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
296 void *pvUnwindInfo;
297#elif defined(IN_RING0)
298 /** Allocation handle. */
299 RTR0MEMOBJ hMemObj;
300#endif
301} IEMEXECMEMCHUNK;
302/** Pointer to a memory chunk. */
303typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
304
305
306/**
307 * Executable memory allocator for the native recompiler.
308 */
309typedef struct IEMEXECMEMALLOCATOR
310{
311 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
312 uint32_t uMagic;
313
314 /** The chunk size. */
315 uint32_t cbChunk;
316 /** The maximum number of chunks. */
317 uint32_t cMaxChunks;
318 /** The current number of chunks. */
319 uint32_t cChunks;
320 /** Hint where to start looking for available memory. */
321 uint32_t idxChunkHint;
322 /** Statistics: Current number of allocations. */
323 uint32_t cAllocations;
324
325 /** The total amount of memory available. */
326 uint64_t cbTotal;
327 /** Total amount of free memory. */
328 uint64_t cbFree;
329 /** Total amount of memory allocated. */
330 uint64_t cbAllocated;
331
332#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
333 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
334 *
335 * Since the chunk size is a power of two and the minimum chunk size is a lot
336 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
337 * require a whole number of uint64_t elements in the allocation bitmap. So,
338 * for sake of simplicity, they are allocated as one continous chunk for
339 * simplicity/laziness. */
340 uint64_t *pbmAlloc;
341 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
342 uint32_t cUnitsPerChunk;
343 /** Number of bitmap elements per chunk (for quickly locating the bitmap
344 * portion corresponding to an chunk). */
345 uint32_t cBitmapElementsPerChunk;
346#else
347 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
348 * @{ */
349 /** The size of the heap internal block header. This is used to adjust the
350 * request memory size to make sure there is exacly enough room for a header at
351 * the end of the blocks we allocate before the next 64 byte alignment line. */
352 uint32_t cbHeapBlockHdr;
353 /** The size of initial heap allocation required make sure the first
354 * allocation is correctly aligned. */
355 uint32_t cbHeapAlignTweak;
356 /** The alignment tweak allocation address. */
357 void *pvAlignTweak;
358 /** @} */
359#endif
360
361#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
362 /** Pointer to the array of unwind info running parallel to aChunks (same
363 * allocation as this structure, located after the bitmaps).
364 * (For Windows, the structures must reside in 32-bit RVA distance to the
365 * actual chunk, so they are allocated off the chunk.) */
366 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
367#endif
368
369 /** The allocation chunks. */
370 RT_FLEXIBLE_ARRAY_EXTENSION
371 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
372} IEMEXECMEMALLOCATOR;
373/** Pointer to an executable memory allocator. */
374typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
375
376/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
377#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
378
379
380static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
381
382
383/**
384 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
385 * the heap statistics.
386 */
387static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
388 uint32_t cbReq, uint32_t idxChunk)
389{
390 pExecMemAllocator->cAllocations += 1;
391 pExecMemAllocator->cbAllocated += cbReq;
392#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
393 pExecMemAllocator->cbFree -= cbReq;
394#else
395 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
396#endif
397 pExecMemAllocator->idxChunkHint = idxChunk;
398
399#ifdef RT_OS_DARWIN
400 /*
401 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
402 * on darwin. So, we mark the pages returned as read+write after alloc and
403 * expect the caller to call iemExecMemAllocatorReadyForUse when done
404 * writing to the allocation.
405 *
406 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
407 * for details.
408 */
409 /** @todo detect if this is necessary... it wasn't required on 10.15 or
410 * whatever older version it was. */
411 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
412 AssertRC(rc);
413#endif
414
415 return pvRet;
416}
417
418
419#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
420static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
421 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
422{
423 /*
424 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
425 */
426 Assert(!(cToScan & 63));
427 Assert(!(idxFirst & 63));
428 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
429 pbmAlloc += idxFirst / 64;
430
431 /*
432 * Scan the bitmap for cReqUnits of consequtive clear bits
433 */
434 /** @todo This can probably be done more efficiently for non-x86 systems. */
435 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
436 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
437 {
438 uint32_t idxAddBit = 1;
439 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
440 idxAddBit++;
441 if (idxAddBit >= cReqUnits)
442 {
443 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
444
445 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
446 pChunk->cFreeUnits -= cReqUnits;
447 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
448
449 void * const pvRet = (uint8_t *)pChunk->pvChunk
450 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
451
452 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
453 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
454 }
455
456 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
457 }
458 return NULL;
459}
460#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
461
462
463static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
464{
465#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
466 /*
467 * Figure out how much to allocate.
468 */
469 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
470 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
471 {
472 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
473 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
474 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
475 {
476 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
477 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
478 if (pvRet)
479 return pvRet;
480 }
481 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
482 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
483 cReqUnits, idxChunk);
484 }
485#else
486 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
487 if (pvRet)
488 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
489#endif
490 return NULL;
491
492}
493
494
495/**
496 * Allocates @a cbReq bytes of executable memory.
497 *
498 * @returns Pointer to the memory, NULL if out of memory or other problem
499 * encountered.
500 * @param pVCpu The cross context virtual CPU structure of the calling
501 * thread.
502 * @param cbReq How many bytes are required.
503 */
504static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
505{
506 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
507 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
508 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
509
510 /*
511 * Adjust the request size so it'll fit the allocator alignment/whatnot.
512 *
513 * For the RTHeapSimple allocator this means to follow the logic described
514 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
515 * existing chunks if we think we've got sufficient free memory around.
516 *
517 * While for the alternative one we just align it up to a whole unit size.
518 */
519#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
520 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
521#else
522 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
523#endif
524
525 for (unsigned iIteration = 0;; iIteration++)
526 {
527 if (cbReq <= pExecMemAllocator->cbFree)
528 {
529 uint32_t const cChunks = pExecMemAllocator->cChunks;
530 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
531 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
532 {
533 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
534 if (pvRet)
535 return pvRet;
536 }
537 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
538 {
539 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
540 if (pvRet)
541 return pvRet;
542 }
543 }
544
545 /*
546 * Can we grow it with another chunk?
547 */
548 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
549 {
550 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
551 AssertLogRelRCReturn(rc, NULL);
552
553 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
554 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
555 if (pvRet)
556 return pvRet;
557 AssertFailed();
558 }
559
560 /*
561 * Try prune native TBs once.
562 */
563 if (iIteration == 0)
564 iemTbAllocatorFreeupNativeSpace(pVCpu, cbReq / sizeof(IEMNATIVEINSTR));
565 else
566 {
567 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeExecMemInstrBufAllocFailed);
568 return NULL;
569 }
570 }
571
572}
573
574
575/** This is a hook that we may need later for changing memory protection back
576 * to readonly+exec */
577static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
578{
579#ifdef RT_OS_DARWIN
580 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
581 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
582 AssertRC(rc); RT_NOREF(pVCpu);
583
584 /*
585 * Flush the instruction cache:
586 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
587 */
588 /* sys_dcache_flush(pv, cb); - not necessary */
589 sys_icache_invalidate(pv, cb);
590#else
591 RT_NOREF(pVCpu, pv, cb);
592#endif
593}
594
595
596/**
597 * Frees executable memory.
598 */
599void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
600{
601 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
602 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
603 Assert(pv);
604#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
605 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
606#else
607 Assert(!((uintptr_t)pv & 63));
608#endif
609
610 /* Align the size as we did when allocating the block. */
611#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
612 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
613#else
614 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
615#endif
616
617 /* Free it / assert sanity. */
618#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
619 uint32_t const cChunks = pExecMemAllocator->cChunks;
620 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
621 bool fFound = false;
622 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
623 {
624 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
625 fFound = offChunk < cbChunk;
626 if (fFound)
627 {
628#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
629 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
630 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
631
632 /* Check that it's valid and free it. */
633 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
634 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
635 for (uint32_t i = 1; i < cReqUnits; i++)
636 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
637 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
638
639 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
640 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
641
642 /* Update the stats. */
643 pExecMemAllocator->cbAllocated -= cb;
644 pExecMemAllocator->cbFree += cb;
645 pExecMemAllocator->cAllocations -= 1;
646 return;
647#else
648 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
649 break;
650#endif
651 }
652 }
653# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
654 AssertFailed();
655# else
656 Assert(fFound);
657# endif
658#endif
659
660#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
661 /* Update stats while cb is freshly calculated.*/
662 pExecMemAllocator->cbAllocated -= cb;
663 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
664 pExecMemAllocator->cAllocations -= 1;
665
666 /* Free it. */
667 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
668#endif
669}
670
671
672
673#ifdef IN_RING3
674# ifdef RT_OS_WINDOWS
675
676/**
677 * Initializes the unwind info structures for windows hosts.
678 */
679static int
680iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
681 void *pvChunk, uint32_t idxChunk)
682{
683 RT_NOREF(pVCpu);
684
685 /*
686 * The AMD64 unwind opcodes.
687 *
688 * This is a program that starts with RSP after a RET instruction that
689 * ends up in recompiled code, and the operations we describe here will
690 * restore all non-volatile registers and bring RSP back to where our
691 * RET address is. This means it's reverse order from what happens in
692 * the prologue.
693 *
694 * Note! Using a frame register approach here both because we have one
695 * and but mainly because the UWOP_ALLOC_LARGE argument values
696 * would be a pain to write initializers for. On the positive
697 * side, we're impervious to changes in the the stack variable
698 * area can can deal with dynamic stack allocations if necessary.
699 */
700 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
701 {
702 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
703 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
704 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
705 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
706 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
707 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
708 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
709 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
710 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
711 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
712 };
713 union
714 {
715 IMAGE_UNWIND_INFO Info;
716 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
717 } s_UnwindInfo =
718 {
719 {
720 /* .Version = */ 1,
721 /* .Flags = */ 0,
722 /* .SizeOfProlog = */ 16, /* whatever */
723 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
724 /* .FrameRegister = */ X86_GREG_xBP,
725 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
726 }
727 };
728 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
729 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
730
731 /*
732 * Calc how much space we need and allocate it off the exec heap.
733 */
734 unsigned const cFunctionEntries = 1;
735 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
736 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
737# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
738 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
739 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
740 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
741# else
742 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
743 - pExecMemAllocator->cbHeapBlockHdr;
744 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
745 32 /*cbAlignment*/);
746# endif
747 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
748 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
749
750 /*
751 * Initialize the structures.
752 */
753 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
754
755 paFunctions[0].BeginAddress = 0;
756 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
757 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
758
759 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
760 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
761
762 /*
763 * Register it.
764 */
765 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
766 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
767
768 return VINF_SUCCESS;
769}
770
771
772# else /* !RT_OS_WINDOWS */
773
774/**
775 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
776 */
777DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
778{
779 if (iValue >= 64)
780 {
781 Assert(iValue < 0x2000);
782 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
783 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
784 }
785 else if (iValue >= 0)
786 *Ptr.pb++ = (uint8_t)iValue;
787 else if (iValue > -64)
788 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
789 else
790 {
791 Assert(iValue > -0x2000);
792 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
793 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
794 }
795 return Ptr;
796}
797
798
799/**
800 * Emits an ULEB128 encoded value (up to 64-bit wide).
801 */
802DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
803{
804 while (uValue >= 0x80)
805 {
806 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
807 uValue >>= 7;
808 }
809 *Ptr.pb++ = (uint8_t)uValue;
810 return Ptr;
811}
812
813
814/**
815 * Emits a CFA rule as register @a uReg + offset @a off.
816 */
817DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
818{
819 *Ptr.pb++ = DW_CFA_def_cfa;
820 Ptr = iemDwarfPutUleb128(Ptr, uReg);
821 Ptr = iemDwarfPutUleb128(Ptr, off);
822 return Ptr;
823}
824
825
826/**
827 * Emits a register (@a uReg) save location:
828 * CFA + @a off * data_alignment_factor
829 */
830DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
831{
832 if (uReg < 0x40)
833 *Ptr.pb++ = DW_CFA_offset | uReg;
834 else
835 {
836 *Ptr.pb++ = DW_CFA_offset_extended;
837 Ptr = iemDwarfPutUleb128(Ptr, uReg);
838 }
839 Ptr = iemDwarfPutUleb128(Ptr, off);
840 return Ptr;
841}
842
843
844# if 0 /* unused */
845/**
846 * Emits a register (@a uReg) save location, using signed offset:
847 * CFA + @a offSigned * data_alignment_factor
848 */
849DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
850{
851 *Ptr.pb++ = DW_CFA_offset_extended_sf;
852 Ptr = iemDwarfPutUleb128(Ptr, uReg);
853 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
854 return Ptr;
855}
856# endif
857
858
859/**
860 * Initializes the unwind info section for non-windows hosts.
861 */
862static int
863iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
864 void *pvChunk, uint32_t idxChunk)
865{
866 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
867 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
868
869 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
870
871 /*
872 * Generate the CIE first.
873 */
874# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
875 uint8_t const iDwarfVer = 3;
876# else
877 uint8_t const iDwarfVer = 4;
878# endif
879 RTPTRUNION const PtrCie = Ptr;
880 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
881 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
882 *Ptr.pb++ = iDwarfVer; /* DwARF version */
883 *Ptr.pb++ = 0; /* Augmentation. */
884 if (iDwarfVer >= 4)
885 {
886 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
887 *Ptr.pb++ = 0; /* Segment selector size. */
888 }
889# ifdef RT_ARCH_AMD64
890 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
891# else
892 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
893# endif
894 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
895# ifdef RT_ARCH_AMD64
896 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
897# elif defined(RT_ARCH_ARM64)
898 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
899# else
900# error "port me"
901# endif
902 /* Initial instructions: */
903# ifdef RT_ARCH_AMD64
904 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
905 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
906 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
907 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
908 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
909 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
910 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
911 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
912# elif defined(RT_ARCH_ARM64)
913# if 1
914 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
915# else
916 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
917# endif
918 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
919 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
920 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
921 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
922 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
923 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
924 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
925 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
926 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
927 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
928 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
929 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
930 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
931 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
932# else
933# error "port me"
934# endif
935 while ((Ptr.u - PtrCie.u) & 3)
936 *Ptr.pb++ = DW_CFA_nop;
937 /* Finalize the CIE size. */
938 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
939
940 /*
941 * Generate an FDE for the whole chunk area.
942 */
943# ifdef IEMNATIVE_USE_LIBUNWIND
944 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
945# endif
946 RTPTRUNION const PtrFde = Ptr;
947 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
948 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
949 Ptr.pu32++;
950 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
951 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
952# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
953 *Ptr.pb++ = DW_CFA_nop;
954# endif
955 while ((Ptr.u - PtrFde.u) & 3)
956 *Ptr.pb++ = DW_CFA_nop;
957 /* Finalize the FDE size. */
958 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
959
960 /* Terminator entry. */
961 *Ptr.pu32++ = 0;
962 *Ptr.pu32++ = 0; /* just to be sure... */
963 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
964
965 /*
966 * Register it.
967 */
968# ifdef IEMNATIVE_USE_LIBUNWIND
969 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
970# else
971 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
972 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
973# endif
974
975# ifdef IEMNATIVE_USE_GDB_JIT
976 /*
977 * Now for telling GDB about this (experimental).
978 *
979 * This seems to work best with ET_DYN.
980 */
981 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
982# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
983 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
984 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
985# else
986 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
987 - pExecMemAllocator->cbHeapBlockHdr;
988 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
989# endif
990 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
991 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
992
993 RT_ZERO(*pSymFile);
994
995 /*
996 * The ELF header:
997 */
998 pSymFile->EHdr.e_ident[0] = ELFMAG0;
999 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1000 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1001 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1002 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1003 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1004 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1005 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1006# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1007 pSymFile->EHdr.e_type = ET_DYN;
1008# else
1009 pSymFile->EHdr.e_type = ET_REL;
1010# endif
1011# ifdef RT_ARCH_AMD64
1012 pSymFile->EHdr.e_machine = EM_AMD64;
1013# elif defined(RT_ARCH_ARM64)
1014 pSymFile->EHdr.e_machine = EM_AARCH64;
1015# else
1016# error "port me"
1017# endif
1018 pSymFile->EHdr.e_version = 1; /*?*/
1019 pSymFile->EHdr.e_entry = 0;
1020# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1021 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1022# else
1023 pSymFile->EHdr.e_phoff = 0;
1024# endif
1025 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1026 pSymFile->EHdr.e_flags = 0;
1027 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1028# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1029 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1030 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1031# else
1032 pSymFile->EHdr.e_phentsize = 0;
1033 pSymFile->EHdr.e_phnum = 0;
1034# endif
1035 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1036 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1037 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1038
1039 uint32_t offStrTab = 0;
1040#define APPEND_STR(a_szStr) do { \
1041 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1042 offStrTab += sizeof(a_szStr); \
1043 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1044 } while (0)
1045#define APPEND_STR_FMT(a_szStr, ...) do { \
1046 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1047 offStrTab++; \
1048 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1049 } while (0)
1050
1051 /*
1052 * Section headers.
1053 */
1054 /* Section header #0: NULL */
1055 unsigned i = 0;
1056 APPEND_STR("");
1057 RT_ZERO(pSymFile->aShdrs[i]);
1058 i++;
1059
1060 /* Section header: .eh_frame */
1061 pSymFile->aShdrs[i].sh_name = offStrTab;
1062 APPEND_STR(".eh_frame");
1063 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1064 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1065# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1066 pSymFile->aShdrs[i].sh_offset
1067 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1068# else
1069 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1070 pSymFile->aShdrs[i].sh_offset = 0;
1071# endif
1072
1073 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1074 pSymFile->aShdrs[i].sh_link = 0;
1075 pSymFile->aShdrs[i].sh_info = 0;
1076 pSymFile->aShdrs[i].sh_addralign = 1;
1077 pSymFile->aShdrs[i].sh_entsize = 0;
1078 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1079 i++;
1080
1081 /* Section header: .shstrtab */
1082 unsigned const iShStrTab = i;
1083 pSymFile->EHdr.e_shstrndx = iShStrTab;
1084 pSymFile->aShdrs[i].sh_name = offStrTab;
1085 APPEND_STR(".shstrtab");
1086 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1087 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1088# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1089 pSymFile->aShdrs[i].sh_offset
1090 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1091# else
1092 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1093 pSymFile->aShdrs[i].sh_offset = 0;
1094# endif
1095 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1096 pSymFile->aShdrs[i].sh_link = 0;
1097 pSymFile->aShdrs[i].sh_info = 0;
1098 pSymFile->aShdrs[i].sh_addralign = 1;
1099 pSymFile->aShdrs[i].sh_entsize = 0;
1100 i++;
1101
1102 /* Section header: .symbols */
1103 pSymFile->aShdrs[i].sh_name = offStrTab;
1104 APPEND_STR(".symtab");
1105 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1106 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1107 pSymFile->aShdrs[i].sh_offset
1108 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1109 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1110 pSymFile->aShdrs[i].sh_link = iShStrTab;
1111 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1112 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1113 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1114 i++;
1115
1116# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1117 /* Section header: .symbols */
1118 pSymFile->aShdrs[i].sh_name = offStrTab;
1119 APPEND_STR(".dynsym");
1120 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1121 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1122 pSymFile->aShdrs[i].sh_offset
1123 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1124 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1125 pSymFile->aShdrs[i].sh_link = iShStrTab;
1126 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1127 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1128 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1129 i++;
1130# endif
1131
1132# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1133 /* Section header: .dynamic */
1134 pSymFile->aShdrs[i].sh_name = offStrTab;
1135 APPEND_STR(".dynamic");
1136 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1137 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1138 pSymFile->aShdrs[i].sh_offset
1139 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1140 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1141 pSymFile->aShdrs[i].sh_link = iShStrTab;
1142 pSymFile->aShdrs[i].sh_info = 0;
1143 pSymFile->aShdrs[i].sh_addralign = 1;
1144 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1145 i++;
1146# endif
1147
1148 /* Section header: .text */
1149 unsigned const iShText = i;
1150 pSymFile->aShdrs[i].sh_name = offStrTab;
1151 APPEND_STR(".text");
1152 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1153 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1154# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1155 pSymFile->aShdrs[i].sh_offset
1156 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1157# else
1158 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1159 pSymFile->aShdrs[i].sh_offset = 0;
1160# endif
1161 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1162 pSymFile->aShdrs[i].sh_link = 0;
1163 pSymFile->aShdrs[i].sh_info = 0;
1164 pSymFile->aShdrs[i].sh_addralign = 1;
1165 pSymFile->aShdrs[i].sh_entsize = 0;
1166 i++;
1167
1168 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1169
1170# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1171 /*
1172 * The program headers:
1173 */
1174 /* Everything in a single LOAD segment: */
1175 i = 0;
1176 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1177 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1178 pSymFile->aPhdrs[i].p_offset
1179 = pSymFile->aPhdrs[i].p_vaddr
1180 = pSymFile->aPhdrs[i].p_paddr = 0;
1181 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1182 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1183 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1184 i++;
1185 /* The .dynamic segment. */
1186 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1187 pSymFile->aPhdrs[i].p_flags = PF_R;
1188 pSymFile->aPhdrs[i].p_offset
1189 = pSymFile->aPhdrs[i].p_vaddr
1190 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1191 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1192 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1193 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1194 i++;
1195
1196 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1197
1198 /*
1199 * The dynamic section:
1200 */
1201 i = 0;
1202 pSymFile->aDyn[i].d_tag = DT_SONAME;
1203 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1204 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1205 i++;
1206 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1207 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1208 i++;
1209 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1210 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1211 i++;
1212 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1213 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1214 i++;
1215 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1216 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1217 i++;
1218 pSymFile->aDyn[i].d_tag = DT_NULL;
1219 i++;
1220 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1221# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1222
1223 /*
1224 * Symbol tables:
1225 */
1226 /** @todo gdb doesn't seem to really like this ... */
1227 i = 0;
1228 pSymFile->aSymbols[i].st_name = 0;
1229 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1230 pSymFile->aSymbols[i].st_value = 0;
1231 pSymFile->aSymbols[i].st_size = 0;
1232 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1233 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1234# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1235 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1236# endif
1237 i++;
1238
1239 pSymFile->aSymbols[i].st_name = 0;
1240 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1241 pSymFile->aSymbols[i].st_value = 0;
1242 pSymFile->aSymbols[i].st_size = 0;
1243 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1244 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1245 i++;
1246
1247 pSymFile->aSymbols[i].st_name = offStrTab;
1248 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1249# if 0
1250 pSymFile->aSymbols[i].st_shndx = iShText;
1251 pSymFile->aSymbols[i].st_value = 0;
1252# else
1253 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1254 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1255# endif
1256 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1257 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1258 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1259# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1260 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1261 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1262# endif
1263 i++;
1264
1265 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1266 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1267
1268 /*
1269 * The GDB JIT entry and informing GDB.
1270 */
1271 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1272# if 1
1273 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1274# else
1275 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1276# endif
1277
1278 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1279 RTCritSectEnter(&g_IemNativeGdbJitLock);
1280 pEhFrame->GdbJitEntry.pNext = NULL;
1281 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1282 if (__jit_debug_descriptor.pTail)
1283 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1284 else
1285 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1286 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1287 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1288
1289 /* Notify GDB: */
1290 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1291 __jit_debug_register_code();
1292 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1293 RTCritSectLeave(&g_IemNativeGdbJitLock);
1294
1295# else /* !IEMNATIVE_USE_GDB_JIT */
1296 RT_NOREF(pVCpu);
1297# endif /* !IEMNATIVE_USE_GDB_JIT */
1298
1299 return VINF_SUCCESS;
1300}
1301
1302# endif /* !RT_OS_WINDOWS */
1303#endif /* IN_RING3 */
1304
1305
1306/**
1307 * Adds another chunk to the executable memory allocator.
1308 *
1309 * This is used by the init code for the initial allocation and later by the
1310 * regular allocator function when it's out of memory.
1311 */
1312static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1313{
1314 /* Check that we've room for growth. */
1315 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1316 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1317
1318 /* Allocate a chunk. */
1319#ifdef RT_OS_DARWIN
1320 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1321#else
1322 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1323#endif
1324 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1325
1326#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1327 int rc = VINF_SUCCESS;
1328#else
1329 /* Initialize the heap for the chunk. */
1330 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1331 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1332 AssertRC(rc);
1333 if (RT_SUCCESS(rc))
1334 {
1335 /*
1336 * We want the memory to be aligned on 64 byte, so the first time thru
1337 * here we do some exploratory allocations to see how we can achieve this.
1338 * On subsequent runs we only make an initial adjustment allocation, if
1339 * necessary.
1340 *
1341 * Since we own the heap implementation, we know that the internal block
1342 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1343 * so all we need to wrt allocation size adjustments is to add 32 bytes
1344 * to the size, align up by 64 bytes, and subtract 32 bytes.
1345 *
1346 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1347 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1348 * allocation to force subsequent allocations to return 64 byte aligned
1349 * user areas.
1350 */
1351 if (!pExecMemAllocator->cbHeapBlockHdr)
1352 {
1353 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1354 pExecMemAllocator->cbHeapAlignTweak = 64;
1355 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1356 32 /*cbAlignment*/);
1357 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1358
1359 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1360 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1361 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1362 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1363 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1364
1365 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1366 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1367 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1368 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1369 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1370
1371 RTHeapSimpleFree(hHeap, pvTest2);
1372 RTHeapSimpleFree(hHeap, pvTest1);
1373 }
1374 else
1375 {
1376 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1377 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1378 }
1379 if (RT_SUCCESS(rc))
1380#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1381 {
1382 /*
1383 * Add the chunk.
1384 *
1385 * This must be done before the unwind init so windows can allocate
1386 * memory from the chunk when using the alternative sub-allocator.
1387 */
1388 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1389#ifdef IN_RING3
1390 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1391#endif
1392#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1393 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1394#else
1395 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1396 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1397 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1398 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1399#endif
1400
1401 pExecMemAllocator->cChunks = idxChunk + 1;
1402 pExecMemAllocator->idxChunkHint = idxChunk;
1403
1404#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1405 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1406 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1407#else
1408 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1409 pExecMemAllocator->cbTotal += cbFree;
1410 pExecMemAllocator->cbFree += cbFree;
1411#endif
1412
1413#ifdef IN_RING3
1414 /*
1415 * Initialize the unwind information (this cannot really fail atm).
1416 * (This sets pvUnwindInfo.)
1417 */
1418 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1419 if (RT_SUCCESS(rc))
1420#endif
1421 {
1422 return VINF_SUCCESS;
1423 }
1424
1425#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1426 /* Just in case the impossible happens, undo the above up: */
1427 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1428 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1429 pExecMemAllocator->cChunks = idxChunk;
1430 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1431 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1432 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1433 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1434#endif
1435 }
1436#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1437 }
1438#endif
1439 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1440 RT_NOREF(pVCpu);
1441 return rc;
1442}
1443
1444
1445/**
1446 * Initializes the executable memory allocator for native recompilation on the
1447 * calling EMT.
1448 *
1449 * @returns VBox status code.
1450 * @param pVCpu The cross context virtual CPU structure of the calling
1451 * thread.
1452 * @param cbMax The max size of the allocator.
1453 * @param cbInitial The initial allocator size.
1454 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1455 * dependent).
1456 */
1457int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1458{
1459 /*
1460 * Validate input.
1461 */
1462 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1463 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1464 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1465 || cbChunk == 0
1466 || ( RT_IS_POWER_OF_TWO(cbChunk)
1467 && cbChunk >= _1M
1468 && cbChunk <= _256M
1469 && cbChunk <= cbMax),
1470 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1471 VERR_OUT_OF_RANGE);
1472
1473 /*
1474 * Adjust/figure out the chunk size.
1475 */
1476 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1477 {
1478 if (cbMax >= _256M)
1479 cbChunk = _64M;
1480 else
1481 {
1482 if (cbMax < _16M)
1483 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1484 else
1485 cbChunk = (uint32_t)cbMax / 4;
1486 if (!RT_IS_POWER_OF_TWO(cbChunk))
1487 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1488 }
1489 }
1490
1491 if (cbChunk > cbMax)
1492 cbMax = cbChunk;
1493 else
1494 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1495 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1496 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1497
1498 /*
1499 * Allocate and initialize the allocatore instance.
1500 */
1501 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1502#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1503 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1504 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1505 cbNeeded += cbBitmap * cMaxChunks;
1506 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1507 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1508#endif
1509#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1510 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1511 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1512#endif
1513 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1514 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1515 VERR_NO_MEMORY);
1516 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1517 pExecMemAllocator->cbChunk = cbChunk;
1518 pExecMemAllocator->cMaxChunks = cMaxChunks;
1519 pExecMemAllocator->cChunks = 0;
1520 pExecMemAllocator->idxChunkHint = 0;
1521 pExecMemAllocator->cAllocations = 0;
1522 pExecMemAllocator->cbTotal = 0;
1523 pExecMemAllocator->cbFree = 0;
1524 pExecMemAllocator->cbAllocated = 0;
1525#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1526 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1527 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1528 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1529 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1530#endif
1531#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1532 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1533#endif
1534 for (uint32_t i = 0; i < cMaxChunks; i++)
1535 {
1536#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1537 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1538 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1539#else
1540 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1541#endif
1542 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1543#ifdef IN_RING0
1544 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1545#else
1546 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1547#endif
1548 }
1549 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1550
1551 /*
1552 * Do the initial allocations.
1553 */
1554 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1555 {
1556 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1557 AssertLogRelRCReturn(rc, rc);
1558 }
1559
1560 pExecMemAllocator->idxChunkHint = 0;
1561
1562 return VINF_SUCCESS;
1563}
1564
1565
1566/*********************************************************************************************************************************
1567* Native Recompilation *
1568*********************************************************************************************************************************/
1569
1570
1571/**
1572 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1573 */
1574IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1575{
1576 pVCpu->iem.s.cInstructions += idxInstr;
1577 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1578}
1579
1580
1581/**
1582 * Used by TB code when it wants to raise a \#DE.
1583 */
1584IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseDe,(PVMCPUCC pVCpu))
1585{
1586 iemRaiseDivideErrorJmp(pVCpu);
1587#ifndef _MSC_VER
1588 return VINF_IEM_RAISED_XCPT; /* not reached */
1589#endif
1590}
1591
1592
1593/**
1594 * Used by TB code when it wants to raise a \#UD.
1595 */
1596IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
1597{
1598 iemRaiseUndefinedOpcodeJmp(pVCpu);
1599#ifndef _MSC_VER
1600 return VINF_IEM_RAISED_XCPT; /* not reached */
1601#endif
1602}
1603
1604
1605/**
1606 * Used by TB code when it wants to raise an SSE related \#UD or \#NM.
1607 *
1608 * See IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT.
1609 */
1610IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseRelated,(PVMCPUCC pVCpu))
1611{
1612 if ( (pVCpu->cpum.GstCtx.cr0 & X86_CR0_EM)
1613 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSFXSR))
1614 iemRaiseUndefinedOpcodeJmp(pVCpu);
1615 else
1616 iemRaiseDeviceNotAvailableJmp(pVCpu);
1617#ifndef _MSC_VER
1618 return VINF_IEM_RAISED_XCPT; /* not reached */
1619#endif
1620}
1621
1622
1623/**
1624 * Used by TB code when it wants to raise an AVX related \#UD or \#NM.
1625 *
1626 * See IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT.
1627 */
1628IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseAvxRelated,(PVMCPUCC pVCpu))
1629{
1630 if ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE)
1631 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE))
1632 iemRaiseUndefinedOpcodeJmp(pVCpu);
1633 else
1634 iemRaiseDeviceNotAvailableJmp(pVCpu);
1635#ifndef _MSC_VER
1636 return VINF_IEM_RAISED_XCPT; /* not reached */
1637#endif
1638}
1639
1640
1641/**
1642 * Used by TB code when it wants to raise an SSE/AVX floating point exception related \#UD or \#XF.
1643 *
1644 * See IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT.
1645 */
1646IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseAvxFpRelated,(PVMCPUCC pVCpu))
1647{
1648 if (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXMMEEXCPT)
1649 iemRaiseSimdFpExceptionJmp(pVCpu);
1650 else
1651 iemRaiseUndefinedOpcodeJmp(pVCpu);
1652#ifndef _MSC_VER
1653 return VINF_IEM_RAISED_XCPT; /* not reached */
1654#endif
1655}
1656
1657
1658/**
1659 * Used by TB code when it wants to raise a \#NM.
1660 */
1661IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
1662{
1663 iemRaiseDeviceNotAvailableJmp(pVCpu);
1664#ifndef _MSC_VER
1665 return VINF_IEM_RAISED_XCPT; /* not reached */
1666#endif
1667}
1668
1669
1670/**
1671 * Used by TB code when it wants to raise a \#GP(0).
1672 */
1673IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
1674{
1675 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1676#ifndef _MSC_VER
1677 return VINF_IEM_RAISED_XCPT; /* not reached */
1678#endif
1679}
1680
1681
1682/**
1683 * Used by TB code when it wants to raise a \#MF.
1684 */
1685IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
1686{
1687 iemRaiseMathFaultJmp(pVCpu);
1688#ifndef _MSC_VER
1689 return VINF_IEM_RAISED_XCPT; /* not reached */
1690#endif
1691}
1692
1693
1694/**
1695 * Used by TB code when it wants to raise a \#XF.
1696 */
1697IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
1698{
1699 iemRaiseSimdFpExceptionJmp(pVCpu);
1700#ifndef _MSC_VER
1701 return VINF_IEM_RAISED_XCPT; /* not reached */
1702#endif
1703}
1704
1705
1706/**
1707 * Used by TB code when detecting opcode changes.
1708 * @see iemThreadeFuncWorkerObsoleteTb
1709 */
1710IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
1711{
1712 /* We set fSafeToFree to false where as we're being called in the context
1713 of a TB callback function, which for native TBs means we cannot release
1714 the executable memory till we've returned our way back to iemTbExec as
1715 that return path codes via the native code generated for the TB. */
1716 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1717 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
1718 return VINF_IEM_REEXEC_BREAK;
1719}
1720
1721
1722/**
1723 * Used by TB code when we need to switch to a TB with CS.LIM checking.
1724 */
1725IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
1726{
1727 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
1728 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1729 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
1730 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
1731 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
1732 return VINF_IEM_REEXEC_BREAK;
1733}
1734
1735
1736/**
1737 * Used by TB code when we missed a PC check after a branch.
1738 */
1739IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
1740{
1741 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
1742 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1743 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
1744 pVCpu->iem.s.pbInstrBuf));
1745 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
1746 return VINF_IEM_REEXEC_BREAK;
1747}
1748
1749
1750
1751/*********************************************************************************************************************************
1752* Helpers: Segmented memory fetches and stores. *
1753*********************************************************************************************************************************/
1754
1755/**
1756 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1757 */
1758IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1759{
1760#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1761 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1762#else
1763 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1764#endif
1765}
1766
1767
1768/**
1769 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1770 * to 16 bits.
1771 */
1772IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1773{
1774#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1775 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1776#else
1777 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1778#endif
1779}
1780
1781
1782/**
1783 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1784 * to 32 bits.
1785 */
1786IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1787{
1788#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1789 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1790#else
1791 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1792#endif
1793}
1794
1795/**
1796 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1797 * to 64 bits.
1798 */
1799IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1800{
1801#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1802 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1803#else
1804 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1805#endif
1806}
1807
1808
1809/**
1810 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1811 */
1812IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1813{
1814#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1815 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1816#else
1817 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1818#endif
1819}
1820
1821
1822/**
1823 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1824 * to 32 bits.
1825 */
1826IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1827{
1828#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1829 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1830#else
1831 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1832#endif
1833}
1834
1835
1836/**
1837 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1838 * to 64 bits.
1839 */
1840IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1841{
1842#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1843 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1844#else
1845 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1846#endif
1847}
1848
1849
1850/**
1851 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1852 */
1853IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1854{
1855#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1856 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1857#else
1858 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1859#endif
1860}
1861
1862
1863/**
1864 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
1865 * to 64 bits.
1866 */
1867IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1868{
1869#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1870 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1871#else
1872 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1873#endif
1874}
1875
1876
1877/**
1878 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1879 */
1880IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1881{
1882#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1883 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
1884#else
1885 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
1886#endif
1887}
1888
1889
1890#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1891/**
1892 * Used by TB code to load 128-bit data w/ segmentation.
1893 */
1894IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
1895{
1896#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1897 iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1898#else
1899 iemMemFetchDataU128Jmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1900#endif
1901}
1902
1903
1904/**
1905 * Used by TB code to load 128-bit data w/ segmentation.
1906 */
1907IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
1908{
1909#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1910 iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1911#else
1912 iemMemFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1913#endif
1914}
1915
1916
1917/**
1918 * Used by TB code to load 128-bit data w/ segmentation.
1919 */
1920IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
1921{
1922#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1923 iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1924#else
1925 iemMemFetchDataU128NoAcJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1926#endif
1927}
1928
1929
1930/**
1931 * Used by TB code to load 256-bit data w/ segmentation.
1932 */
1933IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
1934{
1935#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1936 iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
1937#else
1938 iemMemFetchDataU256NoAcJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
1939#endif
1940}
1941
1942
1943/**
1944 * Used by TB code to load 256-bit data w/ segmentation.
1945 */
1946IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
1947{
1948#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1949 iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
1950#else
1951 iemMemFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
1952#endif
1953}
1954#endif
1955
1956
1957/**
1958 * Used by TB code to store unsigned 8-bit data w/ segmentation.
1959 */
1960IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
1961{
1962#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1963 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1964#else
1965 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1966#endif
1967}
1968
1969
1970/**
1971 * Used by TB code to store unsigned 16-bit data w/ segmentation.
1972 */
1973IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
1974{
1975#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1976 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1977#else
1978 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1979#endif
1980}
1981
1982
1983/**
1984 * Used by TB code to store unsigned 32-bit data w/ segmentation.
1985 */
1986IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
1987{
1988#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1989 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1990#else
1991 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1992#endif
1993}
1994
1995
1996/**
1997 * Used by TB code to store unsigned 64-bit data w/ segmentation.
1998 */
1999IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
2000{
2001#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2002 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
2003#else
2004 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
2005#endif
2006}
2007
2008
2009#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2010/**
2011 * Used by TB code to store unsigned 128-bit data w/ segmentation.
2012 */
2013IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
2014{
2015#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2016 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
2017#else
2018 iemMemStoreDataU128AlignedSseJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
2019#endif
2020}
2021
2022
2023/**
2024 * Used by TB code to store unsigned 128-bit data w/ segmentation.
2025 */
2026IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
2027{
2028#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2029 iemMemStoreDataU128NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
2030#else
2031 iemMemStoreDataU128NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
2032#endif
2033}
2034
2035
2036/**
2037 * Used by TB code to store unsigned 256-bit data w/ segmentation.
2038 */
2039IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
2040{
2041#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2042 iemMemStoreDataU256NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
2043#else
2044 iemMemStoreDataU256NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
2045#endif
2046}
2047
2048
2049/**
2050 * Used by TB code to store unsigned 256-bit data w/ segmentation.
2051 */
2052IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
2053{
2054#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2055 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
2056#else
2057 iemMemStoreDataU256AlignedAvxcJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
2058#endif
2059}
2060#endif
2061
2062
2063
2064/**
2065 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
2066 */
2067IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2068{
2069#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2070 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2071#else
2072 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2073#endif
2074}
2075
2076
2077/**
2078 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
2079 */
2080IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2081{
2082#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2083 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2084#else
2085 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2086#endif
2087}
2088
2089
2090/**
2091 * Used by TB code to store an 32-bit selector value onto a generic stack.
2092 *
2093 * Intel CPUs doesn't do write a whole dword, thus the special function.
2094 */
2095IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2096{
2097#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2098 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2099#else
2100 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2101#endif
2102}
2103
2104
2105/**
2106 * Used by TB code to push unsigned 64-bit value onto a generic stack.
2107 */
2108IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2109{
2110#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2111 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2112#else
2113 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2114#endif
2115}
2116
2117
2118/**
2119 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2120 */
2121IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2122{
2123#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2124 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2125#else
2126 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
2127#endif
2128}
2129
2130
2131/**
2132 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2133 */
2134IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2135{
2136#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2137 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2138#else
2139 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
2140#endif
2141}
2142
2143
2144/**
2145 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2146 */
2147IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2148{
2149#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2150 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2151#else
2152 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
2153#endif
2154}
2155
2156
2157
2158/*********************************************************************************************************************************
2159* Helpers: Flat memory fetches and stores. *
2160*********************************************************************************************************************************/
2161
2162/**
2163 * Used by TB code to load unsigned 8-bit data w/ flat address.
2164 * @note Zero extending the value to 64-bit to simplify assembly.
2165 */
2166IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2167{
2168#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2169 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2170#else
2171 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2172#endif
2173}
2174
2175
2176/**
2177 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2178 * to 16 bits.
2179 * @note Zero extending the value to 64-bit to simplify assembly.
2180 */
2181IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2182{
2183#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2184 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2185#else
2186 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2187#endif
2188}
2189
2190
2191/**
2192 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2193 * to 32 bits.
2194 * @note Zero extending the value to 64-bit to simplify assembly.
2195 */
2196IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2197{
2198#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2199 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2200#else
2201 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2202#endif
2203}
2204
2205
2206/**
2207 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2208 * to 64 bits.
2209 */
2210IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2211{
2212#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2213 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2214#else
2215 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2216#endif
2217}
2218
2219
2220/**
2221 * Used by TB code to load unsigned 16-bit data w/ flat address.
2222 * @note Zero extending the value to 64-bit to simplify assembly.
2223 */
2224IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2225{
2226#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2227 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2228#else
2229 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2230#endif
2231}
2232
2233
2234/**
2235 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2236 * to 32 bits.
2237 * @note Zero extending the value to 64-bit to simplify assembly.
2238 */
2239IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2240{
2241#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2242 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2243#else
2244 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2245#endif
2246}
2247
2248
2249/**
2250 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2251 * to 64 bits.
2252 * @note Zero extending the value to 64-bit to simplify assembly.
2253 */
2254IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2255{
2256#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2257 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2258#else
2259 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2260#endif
2261}
2262
2263
2264/**
2265 * Used by TB code to load unsigned 32-bit data w/ flat address.
2266 * @note Zero extending the value to 64-bit to simplify assembly.
2267 */
2268IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2269{
2270#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2271 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2272#else
2273 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2274#endif
2275}
2276
2277
2278/**
2279 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
2280 * to 64 bits.
2281 * @note Zero extending the value to 64-bit to simplify assembly.
2282 */
2283IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2284{
2285#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2286 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2287#else
2288 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2289#endif
2290}
2291
2292
2293/**
2294 * Used by TB code to load unsigned 64-bit data w/ flat address.
2295 */
2296IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2297{
2298#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2299 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2300#else
2301 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
2302#endif
2303}
2304
2305
2306#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2307/**
2308 * Used by TB code to load unsigned 128-bit data w/ flat address.
2309 */
2310IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
2311{
2312#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2313 return iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2314#else
2315 return iemMemFlatFetchDataU128Jmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2316#endif
2317}
2318
2319
2320/**
2321 * Used by TB code to load unsigned 128-bit data w/ flat address.
2322 */
2323IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
2324{
2325#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2326 return iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2327#else
2328 return iemMemFlatFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2329#endif
2330}
2331
2332
2333/**
2334 * Used by TB code to load unsigned 128-bit data w/ flat address.
2335 */
2336IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
2337{
2338#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2339 return iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2340#else
2341 return iemMemFlatFetchDataU128NoAcJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2342#endif
2343}
2344
2345
2346/**
2347 * Used by TB code to load unsigned 256-bit data w/ flat address.
2348 */
2349IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
2350{
2351#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2352 return iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
2353#else
2354 return iemMemFlatFetchDataU256NoAcJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
2355#endif
2356}
2357
2358
2359/**
2360 * Used by TB code to load unsigned 256-bit data w/ flat address.
2361 */
2362IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
2363{
2364#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2365 return iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
2366#else
2367 return iemMemFlatFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
2368#endif
2369}
2370#endif
2371
2372
2373/**
2374 * Used by TB code to store unsigned 8-bit data w/ flat address.
2375 */
2376IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
2377{
2378#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2379 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
2380#else
2381 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
2382#endif
2383}
2384
2385
2386/**
2387 * Used by TB code to store unsigned 16-bit data w/ flat address.
2388 */
2389IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2390{
2391#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2392 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
2393#else
2394 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
2395#endif
2396}
2397
2398
2399/**
2400 * Used by TB code to store unsigned 32-bit data w/ flat address.
2401 */
2402IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2403{
2404#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2405 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
2406#else
2407 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
2408#endif
2409}
2410
2411
2412/**
2413 * Used by TB code to store unsigned 64-bit data w/ flat address.
2414 */
2415IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2416{
2417#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2418 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
2419#else
2420 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
2421#endif
2422}
2423
2424
2425#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2426/**
2427 * Used by TB code to store unsigned 128-bit data w/ flat address.
2428 */
2429IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
2430{
2431#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2432 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
2433#else
2434 iemMemFlatStoreDataU128AlignedSseJmp(pVCpu, GCPtrMem, pu128Src);
2435#endif
2436}
2437
2438
2439/**
2440 * Used by TB code to store unsigned 128-bit data w/ flat address.
2441 */
2442IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
2443{
2444#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2445 iemMemStoreDataU128NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
2446#else
2447 iemMemFlatStoreDataU128NoAcJmp(pVCpu, GCPtrMem, pu128Src);
2448#endif
2449}
2450
2451
2452/**
2453 * Used by TB code to store unsigned 256-bit data w/ flat address.
2454 */
2455IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
2456{
2457#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2458 iemMemStoreDataU256NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
2459#else
2460 iemMemFlatStoreDataU256NoAcJmp(pVCpu, GCPtrMem, pu256Src);
2461#endif
2462}
2463
2464
2465/**
2466 * Used by TB code to store unsigned 256-bit data w/ flat address.
2467 */
2468IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
2469{
2470#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2471 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
2472#else
2473 iemMemFlatStoreDataU256AlignedAvxJmp(pVCpu, GCPtrMem, pu256Src);
2474#endif
2475}
2476#endif
2477
2478
2479
2480/**
2481 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
2482 */
2483IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2484{
2485#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2486 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2487#else
2488 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2489#endif
2490}
2491
2492
2493/**
2494 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
2495 */
2496IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2497{
2498#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2499 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2500#else
2501 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2502#endif
2503}
2504
2505
2506/**
2507 * Used by TB code to store a segment selector value onto a flat stack.
2508 *
2509 * Intel CPUs doesn't do write a whole dword, thus the special function.
2510 */
2511IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2512{
2513#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2514 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2515#else
2516 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2517#endif
2518}
2519
2520
2521/**
2522 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
2523 */
2524IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2525{
2526#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2527 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2528#else
2529 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2530#endif
2531}
2532
2533
2534/**
2535 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2536 */
2537IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2538{
2539#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2540 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2541#else
2542 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
2543#endif
2544}
2545
2546
2547/**
2548 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2549 */
2550IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2551{
2552#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2553 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2554#else
2555 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
2556#endif
2557}
2558
2559
2560/**
2561 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2562 */
2563IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2564{
2565#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2566 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2567#else
2568 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
2569#endif
2570}
2571
2572
2573
2574/*********************************************************************************************************************************
2575* Helpers: Segmented memory mapping. *
2576*********************************************************************************************************************************/
2577
2578/**
2579 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
2580 * segmentation.
2581 */
2582IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2583 RTGCPTR GCPtrMem, uint8_t iSegReg))
2584{
2585#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2586 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2587#else
2588 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2589#endif
2590}
2591
2592
2593/**
2594 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
2595 */
2596IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2597 RTGCPTR GCPtrMem, uint8_t iSegReg))
2598{
2599#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2600 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2601#else
2602 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2603#endif
2604}
2605
2606
2607/**
2608 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
2609 */
2610IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2611 RTGCPTR GCPtrMem, uint8_t iSegReg))
2612{
2613#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2614 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2615#else
2616 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2617#endif
2618}
2619
2620
2621/**
2622 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
2623 */
2624IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2625 RTGCPTR GCPtrMem, uint8_t iSegReg))
2626{
2627#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2628 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2629#else
2630 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2631#endif
2632}
2633
2634
2635/**
2636 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
2637 * segmentation.
2638 */
2639IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2640 RTGCPTR GCPtrMem, uint8_t iSegReg))
2641{
2642#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2643 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2644#else
2645 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2646#endif
2647}
2648
2649
2650/**
2651 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
2652 */
2653IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2654 RTGCPTR GCPtrMem, uint8_t iSegReg))
2655{
2656#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2657 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2658#else
2659 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2660#endif
2661}
2662
2663
2664/**
2665 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
2666 */
2667IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2668 RTGCPTR GCPtrMem, uint8_t iSegReg))
2669{
2670#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2671 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2672#else
2673 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2674#endif
2675}
2676
2677
2678/**
2679 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
2680 */
2681IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2682 RTGCPTR GCPtrMem, uint8_t iSegReg))
2683{
2684#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2685 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2686#else
2687 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2688#endif
2689}
2690
2691
2692/**
2693 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
2694 * segmentation.
2695 */
2696IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2697 RTGCPTR GCPtrMem, uint8_t iSegReg))
2698{
2699#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2700 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2701#else
2702 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2703#endif
2704}
2705
2706
2707/**
2708 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
2709 */
2710IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2711 RTGCPTR GCPtrMem, uint8_t iSegReg))
2712{
2713#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2714 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2715#else
2716 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2717#endif
2718}
2719
2720
2721/**
2722 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
2723 */
2724IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2725 RTGCPTR GCPtrMem, uint8_t iSegReg))
2726{
2727#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2728 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2729#else
2730 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2731#endif
2732}
2733
2734
2735/**
2736 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
2737 */
2738IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2739 RTGCPTR GCPtrMem, uint8_t iSegReg))
2740{
2741#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2742 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2743#else
2744 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2745#endif
2746}
2747
2748
2749/**
2750 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
2751 * segmentation.
2752 */
2753IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2754 RTGCPTR GCPtrMem, uint8_t iSegReg))
2755{
2756#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2757 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2758#else
2759 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2760#endif
2761}
2762
2763
2764/**
2765 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
2766 */
2767IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2768 RTGCPTR GCPtrMem, uint8_t iSegReg))
2769{
2770#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2771 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2772#else
2773 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2774#endif
2775}
2776
2777
2778/**
2779 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
2780 */
2781IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2782 RTGCPTR GCPtrMem, uint8_t iSegReg))
2783{
2784#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2785 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2786#else
2787 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2788#endif
2789}
2790
2791
2792/**
2793 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
2794 */
2795IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2796 RTGCPTR GCPtrMem, uint8_t iSegReg))
2797{
2798#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2799 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2800#else
2801 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2802#endif
2803}
2804
2805
2806/**
2807 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
2808 */
2809IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2810 RTGCPTR GCPtrMem, uint8_t iSegReg))
2811{
2812#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2813 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2814#else
2815 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2816#endif
2817}
2818
2819
2820/**
2821 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
2822 */
2823IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2824 RTGCPTR GCPtrMem, uint8_t iSegReg))
2825{
2826#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2827 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2828#else
2829 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2830#endif
2831}
2832
2833
2834/**
2835 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
2836 * segmentation.
2837 */
2838IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2839 RTGCPTR GCPtrMem, uint8_t iSegReg))
2840{
2841#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2842 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2843#else
2844 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2845#endif
2846}
2847
2848
2849/**
2850 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
2851 */
2852IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2853 RTGCPTR GCPtrMem, uint8_t iSegReg))
2854{
2855#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2856 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2857#else
2858 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2859#endif
2860}
2861
2862
2863/**
2864 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
2865 */
2866IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2867 RTGCPTR GCPtrMem, uint8_t iSegReg))
2868{
2869#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2870 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2871#else
2872 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2873#endif
2874}
2875
2876
2877/**
2878 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
2879 */
2880IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2881 RTGCPTR GCPtrMem, uint8_t iSegReg))
2882{
2883#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2884 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2885#else
2886 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2887#endif
2888}
2889
2890
2891/*********************************************************************************************************************************
2892* Helpers: Flat memory mapping. *
2893*********************************************************************************************************************************/
2894
2895/**
2896 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
2897 * address.
2898 */
2899IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2900{
2901#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2902 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2903#else
2904 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2905#endif
2906}
2907
2908
2909/**
2910 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
2911 */
2912IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2913{
2914#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2915 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2916#else
2917 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2918#endif
2919}
2920
2921
2922/**
2923 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
2924 */
2925IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2926{
2927#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2928 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2929#else
2930 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2931#endif
2932}
2933
2934
2935/**
2936 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
2937 */
2938IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2939{
2940#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2941 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2942#else
2943 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2944#endif
2945}
2946
2947
2948/**
2949 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
2950 * address.
2951 */
2952IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2953{
2954#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2955 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2956#else
2957 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2958#endif
2959}
2960
2961
2962/**
2963 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
2964 */
2965IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2966{
2967#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2968 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2969#else
2970 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2971#endif
2972}
2973
2974
2975/**
2976 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
2977 */
2978IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2979{
2980#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2981 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2982#else
2983 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2984#endif
2985}
2986
2987
2988/**
2989 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
2990 */
2991IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2992{
2993#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2994 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2995#else
2996 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2997#endif
2998}
2999
3000
3001/**
3002 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
3003 * address.
3004 */
3005IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3006{
3007#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3008 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3009#else
3010 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3011#endif
3012}
3013
3014
3015/**
3016 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
3017 */
3018IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3019{
3020#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3021 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3022#else
3023 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3024#endif
3025}
3026
3027
3028/**
3029 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
3030 */
3031IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3032{
3033#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3034 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3035#else
3036 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3037#endif
3038}
3039
3040
3041/**
3042 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
3043 */
3044IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3045{
3046#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3047 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3048#else
3049 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3050#endif
3051}
3052
3053
3054/**
3055 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
3056 * address.
3057 */
3058IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3059{
3060#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3061 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3062#else
3063 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3064#endif
3065}
3066
3067
3068/**
3069 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
3070 */
3071IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3072{
3073#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3074 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3075#else
3076 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3077#endif
3078}
3079
3080
3081/**
3082 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
3083 */
3084IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3085{
3086#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3087 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3088#else
3089 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3090#endif
3091}
3092
3093
3094/**
3095 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
3096 */
3097IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3098{
3099#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3100 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3101#else
3102 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3103#endif
3104}
3105
3106
3107/**
3108 * Used by TB code to map 80-bit float data writeonly w/ flat address.
3109 */
3110IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3111{
3112#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3113 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3114#else
3115 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3116#endif
3117}
3118
3119
3120/**
3121 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
3122 */
3123IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3124{
3125#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3126 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3127#else
3128 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3129#endif
3130}
3131
3132
3133/**
3134 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
3135 * address.
3136 */
3137IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3138{
3139#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3140 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3141#else
3142 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3143#endif
3144}
3145
3146
3147/**
3148 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
3149 */
3150IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3151{
3152#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3153 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3154#else
3155 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3156#endif
3157}
3158
3159
3160/**
3161 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
3162 */
3163IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3164{
3165#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3166 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3167#else
3168 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3169#endif
3170}
3171
3172
3173/**
3174 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
3175 */
3176IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3177{
3178#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3179 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3180#else
3181 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3182#endif
3183}
3184
3185
3186/*********************************************************************************************************************************
3187* Helpers: Commit, rollback & unmap *
3188*********************************************************************************************************************************/
3189
3190/**
3191 * Used by TB code to commit and unmap a read-write memory mapping.
3192 */
3193IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3194{
3195 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
3196}
3197
3198
3199/**
3200 * Used by TB code to commit and unmap a read-write memory mapping.
3201 */
3202IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3203{
3204 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
3205}
3206
3207
3208/**
3209 * Used by TB code to commit and unmap a write-only memory mapping.
3210 */
3211IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3212{
3213 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
3214}
3215
3216
3217/**
3218 * Used by TB code to commit and unmap a read-only memory mapping.
3219 */
3220IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3221{
3222 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
3223}
3224
3225
3226/**
3227 * Reinitializes the native recompiler state.
3228 *
3229 * Called before starting a new recompile job.
3230 */
3231static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
3232{
3233 pReNative->cLabels = 0;
3234 pReNative->bmLabelTypes = 0;
3235 pReNative->cFixups = 0;
3236#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3237 pReNative->pDbgInfo->cEntries = 0;
3238#endif
3239 pReNative->pTbOrg = pTb;
3240 pReNative->cCondDepth = 0;
3241 pReNative->uCondSeqNo = 0;
3242 pReNative->uCheckIrqSeqNo = 0;
3243 pReNative->uTlbSeqNo = 0;
3244
3245#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3246 pReNative->Core.offPc = 0;
3247 pReNative->Core.cInstrPcUpdateSkipped = 0;
3248#endif
3249#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3250 pReNative->fSimdRaiseXcptChecksEmitted = 0;
3251#endif
3252 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
3253#if IEMNATIVE_HST_GREG_COUNT < 32
3254 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
3255#endif
3256 ;
3257 pReNative->Core.bmHstRegsWithGstShadow = 0;
3258 pReNative->Core.bmGstRegShadows = 0;
3259#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3260 pReNative->Core.bmGstRegShadowDirty = 0;
3261#endif
3262 pReNative->Core.bmVars = 0;
3263 pReNative->Core.bmStack = 0;
3264 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
3265 pReNative->Core.u64ArgVars = UINT64_MAX;
3266
3267 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 17);
3268 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
3269 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
3270 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
3271 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
3272 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
3273 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
3274 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
3275 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
3276 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
3277 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
3278 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
3279 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
3280 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
3281 pReNative->aidxUniqueLabels[13] = UINT32_MAX;
3282 pReNative->aidxUniqueLabels[14] = UINT32_MAX;
3283 pReNative->aidxUniqueLabels[15] = UINT32_MAX;
3284 pReNative->aidxUniqueLabels[16] = UINT32_MAX;
3285
3286 /* Full host register reinit: */
3287 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
3288 {
3289 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
3290 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
3291 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
3292 }
3293
3294 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
3295 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
3296#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3297 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
3298#endif
3299#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3300 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
3301#endif
3302#ifdef IEMNATIVE_REG_FIXED_TMP1
3303 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
3304#endif
3305#ifdef IEMNATIVE_REG_FIXED_PC_DBG
3306 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
3307#endif
3308 );
3309 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3310 {
3311 fRegs &= ~RT_BIT_32(idxReg);
3312 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
3313 }
3314
3315 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
3316#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3317 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
3318#endif
3319#ifdef IEMNATIVE_REG_FIXED_TMP0
3320 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3321#endif
3322#ifdef IEMNATIVE_REG_FIXED_TMP1
3323 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
3324#endif
3325#ifdef IEMNATIVE_REG_FIXED_PC_DBG
3326 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
3327#endif
3328
3329#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3330 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
3331# if IEMNATIVE_HST_SIMD_REG_COUNT < 32
3332 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
3333# endif
3334 ;
3335 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
3336 pReNative->Core.bmGstSimdRegShadows = 0;
3337 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
3338 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
3339
3340 /* Full host register reinit: */
3341 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
3342 {
3343 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
3344 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
3345 pReNative->Core.aHstSimdRegs[i].idxVar = UINT8_MAX;
3346 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
3347 }
3348
3349 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK;
3350 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3351 {
3352 fRegs &= ~RT_BIT_32(idxReg);
3353 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
3354 }
3355
3356#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
3357 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3358#endif
3359
3360#endif
3361
3362 return pReNative;
3363}
3364
3365
3366/**
3367 * Allocates and initializes the native recompiler state.
3368 *
3369 * This is called the first time an EMT wants to recompile something.
3370 *
3371 * @returns Pointer to the new recompiler state.
3372 * @param pVCpu The cross context virtual CPU structure of the calling
3373 * thread.
3374 * @param pTb The TB that's about to be recompiled.
3375 * @thread EMT(pVCpu)
3376 */
3377static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
3378{
3379 VMCPU_ASSERT_EMT(pVCpu);
3380
3381 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
3382 AssertReturn(pReNative, NULL);
3383
3384 /*
3385 * Try allocate all the buffers and stuff we need.
3386 */
3387 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
3388 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
3389 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
3390#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3391 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
3392#endif
3393 if (RT_LIKELY( pReNative->pInstrBuf
3394 && pReNative->paLabels
3395 && pReNative->paFixups)
3396#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3397 && pReNative->pDbgInfo
3398#endif
3399 )
3400 {
3401 /*
3402 * Set the buffer & array sizes on success.
3403 */
3404 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
3405 pReNative->cLabelsAlloc = _8K;
3406 pReNative->cFixupsAlloc = _16K;
3407#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3408 pReNative->cDbgInfoAlloc = _16K;
3409#endif
3410
3411 /* Other constant stuff: */
3412 pReNative->pVCpu = pVCpu;
3413
3414 /*
3415 * Done, just need to save it and reinit it.
3416 */
3417 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
3418 return iemNativeReInit(pReNative, pTb);
3419 }
3420
3421 /*
3422 * Failed. Cleanup and return.
3423 */
3424 AssertFailed();
3425 RTMemFree(pReNative->pInstrBuf);
3426 RTMemFree(pReNative->paLabels);
3427 RTMemFree(pReNative->paFixups);
3428#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3429 RTMemFree(pReNative->pDbgInfo);
3430#endif
3431 RTMemFree(pReNative);
3432 return NULL;
3433}
3434
3435
3436/**
3437 * Creates a label
3438 *
3439 * If the label does not yet have a defined position,
3440 * call iemNativeLabelDefine() later to set it.
3441 *
3442 * @returns Label ID. Throws VBox status code on failure, so no need to check
3443 * the return value.
3444 * @param pReNative The native recompile state.
3445 * @param enmType The label type.
3446 * @param offWhere The instruction offset of the label. UINT32_MAX if the
3447 * label is not yet defined (default).
3448 * @param uData Data associated with the lable. Only applicable to
3449 * certain type of labels. Default is zero.
3450 */
3451DECL_HIDDEN_THROW(uint32_t)
3452iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3453 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
3454{
3455 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
3456
3457 /*
3458 * Locate existing label definition.
3459 *
3460 * This is only allowed for forward declarations where offWhere=UINT32_MAX
3461 * and uData is zero.
3462 */
3463 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3464 uint32_t const cLabels = pReNative->cLabels;
3465 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
3466#ifndef VBOX_STRICT
3467 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
3468 && offWhere == UINT32_MAX
3469 && uData == 0
3470#endif
3471 )
3472 {
3473#ifndef VBOX_STRICT
3474 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
3475 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3476 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
3477 if (idxLabel < pReNative->cLabels)
3478 return idxLabel;
3479#else
3480 for (uint32_t i = 0; i < cLabels; i++)
3481 if ( paLabels[i].enmType == enmType
3482 && paLabels[i].uData == uData)
3483 {
3484 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3485 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3486 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
3487 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
3488 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3489 return i;
3490 }
3491 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
3492 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3493#endif
3494 }
3495
3496 /*
3497 * Make sure we've got room for another label.
3498 */
3499 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
3500 { /* likely */ }
3501 else
3502 {
3503 uint32_t cNew = pReNative->cLabelsAlloc;
3504 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3505 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3506 cNew *= 2;
3507 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
3508 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
3509 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
3510 pReNative->paLabels = paLabels;
3511 pReNative->cLabelsAlloc = cNew;
3512 }
3513
3514 /*
3515 * Define a new label.
3516 */
3517 paLabels[cLabels].off = offWhere;
3518 paLabels[cLabels].enmType = enmType;
3519 paLabels[cLabels].uData = uData;
3520 pReNative->cLabels = cLabels + 1;
3521
3522 Assert((unsigned)enmType < 64);
3523 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
3524
3525 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3526 {
3527 Assert(uData == 0);
3528 pReNative->aidxUniqueLabels[enmType] = cLabels;
3529 }
3530
3531 if (offWhere != UINT32_MAX)
3532 {
3533#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3534 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3535 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
3536#endif
3537 }
3538 return cLabels;
3539}
3540
3541
3542/**
3543 * Defines the location of an existing label.
3544 *
3545 * @param pReNative The native recompile state.
3546 * @param idxLabel The label to define.
3547 * @param offWhere The position.
3548 */
3549DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
3550{
3551 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
3552 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
3553 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
3554 pLabel->off = offWhere;
3555#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3556 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3557 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
3558#endif
3559}
3560
3561
3562/**
3563 * Looks up a lable.
3564 *
3565 * @returns Label ID if found, UINT32_MAX if not.
3566 */
3567static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3568 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
3569{
3570 Assert((unsigned)enmType < 64);
3571 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
3572 {
3573 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3574 return pReNative->aidxUniqueLabels[enmType];
3575
3576 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3577 uint32_t const cLabels = pReNative->cLabels;
3578 for (uint32_t i = 0; i < cLabels; i++)
3579 if ( paLabels[i].enmType == enmType
3580 && paLabels[i].uData == uData
3581 && ( paLabels[i].off == offWhere
3582 || offWhere == UINT32_MAX
3583 || paLabels[i].off == UINT32_MAX))
3584 return i;
3585 }
3586 return UINT32_MAX;
3587}
3588
3589
3590/**
3591 * Adds a fixup.
3592 *
3593 * @throws VBox status code (int) on failure.
3594 * @param pReNative The native recompile state.
3595 * @param offWhere The instruction offset of the fixup location.
3596 * @param idxLabel The target label ID for the fixup.
3597 * @param enmType The fixup type.
3598 * @param offAddend Fixup addend if applicable to the type. Default is 0.
3599 */
3600DECL_HIDDEN_THROW(void)
3601iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
3602 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
3603{
3604 Assert(idxLabel <= UINT16_MAX);
3605 Assert((unsigned)enmType <= UINT8_MAX);
3606#ifdef RT_ARCH_ARM64
3607 AssertStmt( enmType != kIemNativeFixupType_RelImm14At5
3608 || pReNative->paLabels[idxLabel].enmType >= kIemNativeLabelType_LastWholeTbBranch,
3609 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_SHORT_JMP_TO_TAIL_LABEL));
3610#endif
3611
3612 /*
3613 * Make sure we've room.
3614 */
3615 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
3616 uint32_t const cFixups = pReNative->cFixups;
3617 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
3618 { /* likely */ }
3619 else
3620 {
3621 uint32_t cNew = pReNative->cFixupsAlloc;
3622 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3623 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3624 cNew *= 2;
3625 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
3626 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
3627 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
3628 pReNative->paFixups = paFixups;
3629 pReNative->cFixupsAlloc = cNew;
3630 }
3631
3632 /*
3633 * Add the fixup.
3634 */
3635 paFixups[cFixups].off = offWhere;
3636 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
3637 paFixups[cFixups].enmType = enmType;
3638 paFixups[cFixups].offAddend = offAddend;
3639 pReNative->cFixups = cFixups + 1;
3640}
3641
3642
3643/**
3644 * Slow code path for iemNativeInstrBufEnsure.
3645 */
3646DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
3647{
3648 /* Double the buffer size till we meet the request. */
3649 uint32_t cNew = pReNative->cInstrBufAlloc;
3650 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
3651 do
3652 cNew *= 2;
3653 while (cNew < off + cInstrReq);
3654
3655 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
3656#ifdef RT_ARCH_ARM64
3657 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
3658#else
3659 uint32_t const cbMaxInstrBuf = _2M;
3660#endif
3661 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
3662
3663 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
3664 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
3665
3666#ifdef VBOX_STRICT
3667 pReNative->offInstrBufChecked = off + cInstrReq;
3668#endif
3669 pReNative->cInstrBufAlloc = cNew;
3670 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
3671}
3672
3673#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3674
3675/**
3676 * Grows the static debug info array used during recompilation.
3677 *
3678 * @returns Pointer to the new debug info block; throws VBox status code on
3679 * failure, so no need to check the return value.
3680 */
3681DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3682{
3683 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
3684 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
3685 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
3686 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
3687 pReNative->pDbgInfo = pDbgInfo;
3688 pReNative->cDbgInfoAlloc = cNew;
3689 return pDbgInfo;
3690}
3691
3692
3693/**
3694 * Adds a new debug info uninitialized entry, returning the pointer to it.
3695 */
3696DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3697{
3698 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
3699 { /* likely */ }
3700 else
3701 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
3702 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
3703}
3704
3705
3706/**
3707 * Debug Info: Adds a native offset record, if necessary.
3708 */
3709DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3710{
3711 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
3712
3713 /*
3714 * Search backwards to see if we've got a similar record already.
3715 */
3716 uint32_t idx = pDbgInfo->cEntries;
3717 uint32_t idxStop = idx > 16 ? idx - 16 : 0;
3718 while (idx-- > idxStop)
3719 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
3720 {
3721 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
3722 return;
3723 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
3724 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
3725 break;
3726 }
3727
3728 /*
3729 * Add it.
3730 */
3731 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
3732 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
3733 pEntry->NativeOffset.offNative = off;
3734}
3735
3736
3737/**
3738 * Debug Info: Record info about a label.
3739 */
3740static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
3741{
3742 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3743 pEntry->Label.uType = kIemTbDbgEntryType_Label;
3744 pEntry->Label.uUnused = 0;
3745 pEntry->Label.enmLabel = (uint8_t)enmType;
3746 pEntry->Label.uData = uData;
3747}
3748
3749
3750/**
3751 * Debug Info: Record info about a threaded call.
3752 */
3753static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
3754{
3755 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3756 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
3757 pEntry->ThreadedCall.fRecompiled = fRecompiled;
3758 pEntry->ThreadedCall.uUnused = 0;
3759 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
3760}
3761
3762
3763/**
3764 * Debug Info: Record info about a new guest instruction.
3765 */
3766static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
3767{
3768 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3769 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
3770 pEntry->GuestInstruction.uUnused = 0;
3771 pEntry->GuestInstruction.fExec = fExec;
3772}
3773
3774
3775/**
3776 * Debug Info: Record info about guest register shadowing.
3777 */
3778DECL_HIDDEN_THROW(void)
3779iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
3780 uint8_t idxHstReg /*= UINT8_MAX*/, uint8_t idxHstRegPrev /*= UINT8_MAX*/)
3781{
3782 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3783 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
3784 pEntry->GuestRegShadowing.uUnused = 0;
3785 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
3786 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
3787 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
3788#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3789 Assert( idxHstReg != UINT8_MAX
3790 || !(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg)));
3791#endif
3792}
3793
3794
3795# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3796/**
3797 * Debug Info: Record info about guest register shadowing.
3798 */
3799DECL_HIDDEN_THROW(void)
3800iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
3801 uint8_t idxHstSimdReg /*= UINT8_MAX*/, uint8_t idxHstSimdRegPrev /*= UINT8_MAX*/)
3802{
3803 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3804 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
3805 pEntry->GuestSimdRegShadowing.uUnused = 0;
3806 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
3807 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
3808 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
3809}
3810# endif
3811
3812
3813# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3814/**
3815 * Debug Info: Record info about delayed RIP updates.
3816 */
3817DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t offPc, uint32_t cInstrSkipped)
3818{
3819 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3820 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
3821 pEntry->DelayedPcUpdate.offPc = offPc;
3822 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
3823}
3824# endif
3825
3826# if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) || defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR)
3827
3828/**
3829 * Debug Info: Record info about a dirty guest register.
3830 */
3831DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegDirty(PIEMRECOMPILERSTATE pReNative, bool fSimdReg,
3832 uint8_t idxGstReg, uint8_t idxHstReg)
3833{
3834 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3835 pEntry->GuestRegDirty.uType = kIemTbDbgEntryType_GuestRegDirty;
3836 pEntry->GuestRegDirty.fSimdReg = fSimdReg ? 1 : 0;
3837 pEntry->GuestRegDirty.idxGstReg = idxGstReg;
3838 pEntry->GuestRegDirty.idxHstReg = idxHstReg;
3839}
3840
3841
3842/**
3843 * Debug Info: Record info about a dirty guest register writeback operation.
3844 */
3845DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegWriteback(PIEMRECOMPILERSTATE pReNative, bool fSimdReg, uint64_t fGstReg)
3846{
3847 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3848 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
3849 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
3850 pEntry->GuestRegWriteback.fGstReg = (uint32_t)fGstReg;
3851 /** @todo r=aeichner Can't fit the whole register mask in the debug info entry, deal with it when it becomes necessary. */
3852 Assert((uint64_t)pEntry->GuestRegWriteback.fGstReg == fGstReg);
3853}
3854
3855# endif /* defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) || defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR) */
3856
3857#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
3858
3859
3860/*********************************************************************************************************************************
3861* Register Allocator *
3862*********************************************************************************************************************************/
3863
3864/**
3865 * Register parameter indexes (indexed by argument number).
3866 */
3867DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
3868{
3869 IEMNATIVE_CALL_ARG0_GREG,
3870 IEMNATIVE_CALL_ARG1_GREG,
3871 IEMNATIVE_CALL_ARG2_GREG,
3872 IEMNATIVE_CALL_ARG3_GREG,
3873#if defined(IEMNATIVE_CALL_ARG4_GREG)
3874 IEMNATIVE_CALL_ARG4_GREG,
3875# if defined(IEMNATIVE_CALL_ARG5_GREG)
3876 IEMNATIVE_CALL_ARG5_GREG,
3877# if defined(IEMNATIVE_CALL_ARG6_GREG)
3878 IEMNATIVE_CALL_ARG6_GREG,
3879# if defined(IEMNATIVE_CALL_ARG7_GREG)
3880 IEMNATIVE_CALL_ARG7_GREG,
3881# endif
3882# endif
3883# endif
3884#endif
3885};
3886AssertCompile(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3887
3888/**
3889 * Call register masks indexed by argument count.
3890 */
3891DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
3892{
3893 0,
3894 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
3895 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
3896 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
3897 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3898 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
3899#if defined(IEMNATIVE_CALL_ARG4_GREG)
3900 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3901 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
3902# if defined(IEMNATIVE_CALL_ARG5_GREG)
3903 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3904 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
3905# if defined(IEMNATIVE_CALL_ARG6_GREG)
3906 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3907 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3908 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
3909# if defined(IEMNATIVE_CALL_ARG7_GREG)
3910 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3911 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3912 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
3913# endif
3914# endif
3915# endif
3916#endif
3917};
3918
3919#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
3920/**
3921 * BP offset of the stack argument slots.
3922 *
3923 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
3924 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
3925 */
3926DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
3927{
3928 IEMNATIVE_FP_OFF_STACK_ARG0,
3929# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
3930 IEMNATIVE_FP_OFF_STACK_ARG1,
3931# endif
3932# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
3933 IEMNATIVE_FP_OFF_STACK_ARG2,
3934# endif
3935# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
3936 IEMNATIVE_FP_OFF_STACK_ARG3,
3937# endif
3938};
3939AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
3940#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
3941
3942/**
3943 * Info about shadowed guest register values.
3944 * @see IEMNATIVEGSTREG
3945 */
3946DECL_HIDDEN_CONST(IEMANTIVEGSTREGINFO const) g_aGstShadowInfo[] =
3947{
3948#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
3949 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
3950 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
3951 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
3952 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
3953 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
3954 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
3955 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
3956 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
3957 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
3958 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
3959 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
3960 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
3961 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
3962 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
3963 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
3964 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
3965 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
3966 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
3967 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
3968 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
3969 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
3970 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
3971 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
3972 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
3973 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
3974 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
3975 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
3976 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
3977 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
3978 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
3979 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
3980 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
3981 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
3982 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
3983 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
3984 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
3985 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
3986 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
3987 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
3988 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
3989 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
3990 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
3991 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
3992 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
3993 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
3994 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
3995 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
3996 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
3997#undef CPUMCTX_OFF_AND_SIZE
3998};
3999AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
4000
4001
4002/** Host CPU general purpose register names. */
4003DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
4004{
4005#ifdef RT_ARCH_AMD64
4006 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
4007#elif RT_ARCH_ARM64
4008 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
4009 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
4010#else
4011# error "port me"
4012#endif
4013};
4014
4015
4016#if 0 /* unused */
4017/**
4018 * Tries to locate a suitable register in the given register mask.
4019 *
4020 * This ASSUMES the caller has done the minimal/optimal allocation checks and
4021 * failed.
4022 *
4023 * @returns Host register number on success, returns UINT8_MAX on failure.
4024 */
4025static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
4026{
4027 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
4028 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
4029 if (fRegs)
4030 {
4031 /** @todo pick better here: */
4032 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
4033
4034 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4035 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4036 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4037 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4038
4039 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4040 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4041 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4042 return idxReg;
4043 }
4044 return UINT8_MAX;
4045}
4046#endif /* unused */
4047
4048
4049#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4050/**
4051 * Stores the host reg @a idxHstReg into guest shadow register @a enmGstReg.
4052 *
4053 * @returns New code buffer offset on success, UINT32_MAX on failure.
4054 * @param pReNative .
4055 * @param off The current code buffer position.
4056 * @param enmGstReg The guest register to store to.
4057 * @param idxHstReg The host register to store from.
4058 */
4059DECL_FORCE_INLINE_THROW(uint32_t)
4060iemNativeEmitStoreGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg, uint8_t idxHstReg)
4061{
4062 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
4063 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
4064
4065 switch (g_aGstShadowInfo[enmGstReg].cb)
4066 {
4067 case sizeof(uint64_t):
4068 return iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4069 case sizeof(uint32_t):
4070 return iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4071 case sizeof(uint16_t):
4072 return iemNativeEmitStoreGprToVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4073#if 0 /* not present in the table. */
4074 case sizeof(uint8_t):
4075 return iemNativeEmitStoreGprToVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
4076#endif
4077 default:
4078 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
4079 }
4080}
4081
4082
4083/**
4084 * Emits code to flush a pending write of the given guest register if any.
4085 *
4086 * @returns New code buffer offset.
4087 * @param pReNative The native recompile state.
4088 * @param off Current code buffer position.
4089 * @param enmGstReg The guest register to flush.
4090 */
4091DECL_HIDDEN_THROW(uint32_t)
4092iemNativeRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg)
4093{
4094 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4095
4096 Assert(enmGstReg >= kIemNativeGstReg_GprFirst && enmGstReg <= kIemNativeGstReg_GprLast);
4097 Assert( idxHstReg != UINT8_MAX
4098 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg));
4099 Log12(("iemNativeRegFlushPendingWrite: Clearing guest register %s shadowed by host %s\n",
4100 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
4101
4102 off = iemNativeEmitStoreGprWithGstShadowReg(pReNative, off, enmGstReg, idxHstReg);
4103
4104 pReNative->Core.bmGstRegShadowDirty &= ~RT_BIT_64(enmGstReg);
4105 return off;
4106}
4107
4108
4109/**
4110 * Flush the given set of guest registers if marked as dirty.
4111 *
4112 * @returns New code buffer offset.
4113 * @param pReNative The native recompile state.
4114 * @param off Current code buffer position.
4115 * @param fFlushGstReg The guest register set to flush (default is flush everything).
4116 */
4117DECL_HIDDEN_THROW(uint32_t)
4118iemNativeRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstReg /*= UINT64_MAX*/)
4119{
4120 if (pReNative->Core.bmGstRegShadowDirty & fFlushGstReg)
4121 {
4122# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4123 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4124 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, pReNative->Core.bmGstRegShadowDirty & fFlushGstReg);
4125# endif
4126
4127 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fFlushGstReg;
4128 uint32_t idxGstReg = 0;
4129
4130 do
4131 {
4132 if (bmGstRegShadowDirty & 0x1)
4133 {
4134 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
4135 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4136 }
4137 idxGstReg++;
4138 bmGstRegShadowDirty >>= 1;
4139 } while (bmGstRegShadowDirty);
4140 }
4141
4142 return off;
4143}
4144
4145
4146/**
4147 * Flush all shadowed guest registers marked as dirty for the given host register.
4148 *
4149 * @returns New code buffer offset.
4150 * @param pReNative The native recompile state.
4151 * @param off Current code buffer position.
4152 * @param idxHstReg The host register.
4153 *
4154 * @note This doesn't do any unshadowing of guest registers from the host register.
4155 */
4156DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushDirtyGuestByHostRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg)
4157{
4158 /* We need to flush any pending guest register writes this host register shadows. */
4159 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4160 if (pReNative->Core.bmGstRegShadowDirty & fGstRegShadows)
4161 {
4162# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4163 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4164 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, pReNative->Core.bmGstRegShadowDirty & fGstRegShadows);
4165# endif
4166
4167 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fGstRegShadows;
4168 uint32_t idxGstReg = 0;
4169 do
4170 {
4171 if (bmGstRegShadowDirty & 0x1)
4172 {
4173 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
4174 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4175 }
4176 idxGstReg++;
4177 bmGstRegShadowDirty >>= 1;
4178 } while (bmGstRegShadowDirty);
4179 }
4180
4181 return off;
4182}
4183#endif
4184
4185
4186/**
4187 * Locate a register, possibly freeing one up.
4188 *
4189 * This ASSUMES the caller has done the minimal/optimal allocation checks and
4190 * failed.
4191 *
4192 * @returns Host register number on success. Returns UINT8_MAX if no registers
4193 * found, the caller is supposed to deal with this and raise a
4194 * allocation type specific status code (if desired).
4195 *
4196 * @throws VBox status code if we're run into trouble spilling a variable of
4197 * recording debug info. Does NOT throw anything if we're out of
4198 * registers, though.
4199 */
4200static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
4201 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
4202{
4203 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
4204 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
4205 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
4206
4207 /*
4208 * Try a freed register that's shadowing a guest register.
4209 */
4210 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
4211 if (fRegs)
4212 {
4213 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
4214
4215#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4216 /*
4217 * When we have livness information, we use it to kick out all shadowed
4218 * guest register that will not be needed any more in this TB. If we're
4219 * lucky, this may prevent us from ending up here again.
4220 *
4221 * Note! We must consider the previous entry here so we don't free
4222 * anything that the current threaded function requires (current
4223 * entry is produced by the next threaded function).
4224 */
4225 uint32_t const idxCurCall = pReNative->idxCurCall;
4226 if (idxCurCall > 0)
4227 {
4228 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
4229
4230# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4231 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
4232 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
4233 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
4234#else
4235 /* Construct a mask of the registers not in the read or write state.
4236 Note! We could skips writes, if they aren't from us, as this is just
4237 a hack to prevent trashing registers that have just been written
4238 or will be written when we retire the current instruction. */
4239 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
4240 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
4241 & IEMLIVENESSBIT_MASK;
4242#endif
4243 /* Merge EFLAGS. */
4244 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
4245 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
4246 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
4247 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
4248 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
4249
4250 /* If it matches any shadowed registers. */
4251 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
4252 {
4253#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4254 /* Writeback any dirty shadow registers we are about to unshadow. */
4255 *poff = iemNativeRegFlushDirtyGuest(pReNative, *poff, fToFreeMask);
4256#endif
4257
4258 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
4259 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
4260 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
4261
4262 /* See if we've got any unshadowed registers we can return now. */
4263 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
4264 if (fUnshadowedRegs)
4265 {
4266 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
4267 return (fPreferVolatile
4268 ? ASMBitFirstSetU32(fUnshadowedRegs)
4269 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4270 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
4271 - 1;
4272 }
4273 }
4274 }
4275#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4276
4277 unsigned const idxReg = (fPreferVolatile
4278 ? ASMBitFirstSetU32(fRegs)
4279 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4280 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
4281 - 1;
4282
4283 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4284 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4285 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4286 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4287
4288#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4289 /* We need to flush any pending guest register writes this host register shadows. */
4290 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
4291#endif
4292
4293 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4294 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4295 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4296 return idxReg;
4297 }
4298
4299 /*
4300 * Try free up a variable that's in a register.
4301 *
4302 * We do two rounds here, first evacuating variables we don't need to be
4303 * saved on the stack, then in the second round move things to the stack.
4304 */
4305 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
4306 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
4307 {
4308 uint32_t fVars = pReNative->Core.bmVars;
4309 while (fVars)
4310 {
4311 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
4312 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
4313#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4314 if (pReNative->Core.aVars[idxVar].fSimdReg) /* Need to ignore SIMD variables here or we end up freeing random registers. */
4315 continue;
4316#endif
4317
4318 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
4319 && (RT_BIT_32(idxReg) & fRegMask)
4320 && ( iLoop == 0
4321 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
4322 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
4323 && !pReNative->Core.aVars[idxVar].fRegAcquired)
4324 {
4325 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
4326 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
4327 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4328 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4329 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4330 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
4331#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4332 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
4333#endif
4334
4335 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
4336 {
4337 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
4338 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
4339 }
4340
4341 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4342 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
4343
4344 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4345 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4346 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4347 return idxReg;
4348 }
4349 fVars &= ~RT_BIT_32(idxVar);
4350 }
4351 }
4352
4353 return UINT8_MAX;
4354}
4355
4356
4357/**
4358 * Reassigns a variable to a different register specified by the caller.
4359 *
4360 * @returns The new code buffer position.
4361 * @param pReNative The native recompile state.
4362 * @param off The current code buffer position.
4363 * @param idxVar The variable index.
4364 * @param idxRegOld The old host register number.
4365 * @param idxRegNew The new host register number.
4366 * @param pszCaller The caller for logging.
4367 */
4368static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4369 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
4370{
4371 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4372 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
4373#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4374 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4375#endif
4376 RT_NOREF(pszCaller);
4377
4378#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4379 Assert(!(pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
4380#endif
4381 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
4382
4383 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
4384#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4385 Assert(!(fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
4386#endif
4387 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
4388 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
4389 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
4390
4391 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
4392 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
4393 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
4394 if (fGstRegShadows)
4395 {
4396 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
4397 | RT_BIT_32(idxRegNew);
4398 while (fGstRegShadows)
4399 {
4400 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4401 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4402
4403 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
4404 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
4405 }
4406 }
4407
4408 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
4409 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
4410 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
4411 return off;
4412}
4413
4414
4415/**
4416 * Moves a variable to a different register or spills it onto the stack.
4417 *
4418 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
4419 * kinds can easily be recreated if needed later.
4420 *
4421 * @returns The new code buffer position.
4422 * @param pReNative The native recompile state.
4423 * @param off The current code buffer position.
4424 * @param idxVar The variable index.
4425 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
4426 * call-volatile registers.
4427 */
4428DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4429 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_GREG_MASK*/)
4430{
4431 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4432 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4433 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
4434 Assert(!pVar->fRegAcquired);
4435
4436 uint8_t const idxRegOld = pVar->idxReg;
4437 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
4438 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
4439 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
4440 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
4441 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
4442 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4443 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
4444 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
4445#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4446 Assert(!(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
4447#endif
4448
4449
4450 /** @todo Add statistics on this.*/
4451 /** @todo Implement basic variable liveness analysis (python) so variables
4452 * can be freed immediately once no longer used. This has the potential to
4453 * be trashing registers and stack for dead variables.
4454 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
4455
4456 /*
4457 * First try move it to a different register, as that's cheaper.
4458 */
4459 fForbiddenRegs |= RT_BIT_32(idxRegOld);
4460 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
4461 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
4462 if (fRegs)
4463 {
4464 /* Avoid using shadow registers, if possible. */
4465 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
4466 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
4467 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
4468 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
4469 }
4470
4471 /*
4472 * Otherwise we must spill the register onto the stack.
4473 */
4474 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
4475 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
4476 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
4477 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
4478
4479 pVar->idxReg = UINT8_MAX;
4480 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
4481 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
4482 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
4483 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
4484 return off;
4485}
4486
4487
4488/**
4489 * Allocates a temporary host general purpose register.
4490 *
4491 * This may emit code to save register content onto the stack in order to free
4492 * up a register.
4493 *
4494 * @returns The host register number; throws VBox status code on failure,
4495 * so no need to check the return value.
4496 * @param pReNative The native recompile state.
4497 * @param poff Pointer to the variable with the code buffer position.
4498 * This will be update if we need to move a variable from
4499 * register to stack in order to satisfy the request.
4500 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4501 * registers (@c true, default) or the other way around
4502 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4503 */
4504DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
4505{
4506 /*
4507 * Try find a completely unused register, preferably a call-volatile one.
4508 */
4509 uint8_t idxReg;
4510 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4511 & ~pReNative->Core.bmHstRegsWithGstShadow
4512 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
4513 if (fRegs)
4514 {
4515 if (fPreferVolatile)
4516 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4517 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4518 else
4519 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4520 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4521 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4522 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4523 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4524 }
4525 else
4526 {
4527 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
4528 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4529 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4530 }
4531 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4532}
4533
4534
4535/**
4536 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
4537 * registers.
4538 *
4539 * @returns The host register number; throws VBox status code on failure,
4540 * so no need to check the return value.
4541 * @param pReNative The native recompile state.
4542 * @param poff Pointer to the variable with the code buffer position.
4543 * This will be update if we need to move a variable from
4544 * register to stack in order to satisfy the request.
4545 * @param fRegMask Mask of acceptable registers.
4546 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4547 * registers (@c true, default) or the other way around
4548 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4549 */
4550DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
4551 bool fPreferVolatile /*= true*/)
4552{
4553 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
4554 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
4555
4556 /*
4557 * Try find a completely unused register, preferably a call-volatile one.
4558 */
4559 uint8_t idxReg;
4560 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4561 & ~pReNative->Core.bmHstRegsWithGstShadow
4562 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
4563 & fRegMask;
4564 if (fRegs)
4565 {
4566 if (fPreferVolatile)
4567 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4568 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4569 else
4570 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4571 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4572 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4573 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4574 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4575 }
4576 else
4577 {
4578 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
4579 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4580 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4581 }
4582 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4583}
4584
4585
4586/**
4587 * Allocates a temporary register for loading an immediate value into.
4588 *
4589 * This will emit code to load the immediate, unless there happens to be an
4590 * unused register with the value already loaded.
4591 *
4592 * The caller will not modify the returned register, it must be considered
4593 * read-only. Free using iemNativeRegFreeTmpImm.
4594 *
4595 * @returns The host register number; throws VBox status code on failure, so no
4596 * need to check the return value.
4597 * @param pReNative The native recompile state.
4598 * @param poff Pointer to the variable with the code buffer position.
4599 * @param uImm The immediate value that the register must hold upon
4600 * return.
4601 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4602 * registers (@c true, default) or the other way around
4603 * (@c false).
4604 *
4605 * @note Reusing immediate values has not been implemented yet.
4606 */
4607DECL_HIDDEN_THROW(uint8_t)
4608iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
4609{
4610 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
4611 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
4612 return idxReg;
4613}
4614
4615
4616/**
4617 * Allocates a temporary host general purpose register for keeping a guest
4618 * register value.
4619 *
4620 * Since we may already have a register holding the guest register value,
4621 * code will be emitted to do the loading if that's not the case. Code may also
4622 * be emitted if we have to free up a register to satify the request.
4623 *
4624 * @returns The host register number; throws VBox status code on failure, so no
4625 * need to check the return value.
4626 * @param pReNative The native recompile state.
4627 * @param poff Pointer to the variable with the code buffer
4628 * position. This will be update if we need to move a
4629 * variable from register to stack in order to satisfy
4630 * the request.
4631 * @param enmGstReg The guest register that will is to be updated.
4632 * @param enmIntendedUse How the caller will be using the host register.
4633 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
4634 * register is okay (default). The ASSUMPTION here is
4635 * that the caller has already flushed all volatile
4636 * registers, so this is only applied if we allocate a
4637 * new register.
4638 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
4639 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
4640 */
4641DECL_HIDDEN_THROW(uint8_t)
4642iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
4643 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
4644 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
4645{
4646 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4647#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4648 AssertMsg( fSkipLivenessAssert
4649 || pReNative->idxCurCall == 0
4650 || enmGstReg == kIemNativeGstReg_Pc
4651 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4652 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4653 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
4654 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4655 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
4656 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4657#endif
4658 RT_NOREF(fSkipLivenessAssert);
4659#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
4660 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
4661#endif
4662 uint32_t const fRegMask = !fNoVolatileRegs
4663 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
4664 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4665
4666 /*
4667 * First check if the guest register value is already in a host register.
4668 */
4669 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4670 {
4671 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4672 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4673 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4674 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4675
4676 /* It's not supposed to be allocated... */
4677 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4678 {
4679 /*
4680 * If the register will trash the guest shadow copy, try find a
4681 * completely unused register we can use instead. If that fails,
4682 * we need to disassociate the host reg from the guest reg.
4683 */
4684 /** @todo would be nice to know if preserving the register is in any way helpful. */
4685 /* If the purpose is calculations, try duplicate the register value as
4686 we'll be clobbering the shadow. */
4687 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
4688 && ( ~pReNative->Core.bmHstRegs
4689 & ~pReNative->Core.bmHstRegsWithGstShadow
4690 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
4691 {
4692 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
4693
4694 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4695
4696 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4697 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4698 g_apszIemNativeHstRegNames[idxRegNew]));
4699 idxReg = idxRegNew;
4700 }
4701 /* If the current register matches the restrictions, go ahead and allocate
4702 it for the caller. */
4703 else if (fRegMask & RT_BIT_32(idxReg))
4704 {
4705 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4706 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4707 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4708 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4709 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
4710 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4711 else
4712 {
4713 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
4714 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
4715 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4716 }
4717 }
4718 /* Otherwise, allocate a register that satisfies the caller and transfer
4719 the shadowing if compatible with the intended use. (This basically
4720 means the call wants a non-volatile register (RSP push/pop scenario).) */
4721 else
4722 {
4723 Assert(fNoVolatileRegs);
4724 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
4725 !fNoVolatileRegs
4726 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
4727 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4728 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4729 {
4730 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4731 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
4732 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
4733 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4734 }
4735 else
4736 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4737 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4738 g_apszIemNativeHstRegNames[idxRegNew]));
4739 idxReg = idxRegNew;
4740 }
4741 }
4742 else
4743 {
4744 /*
4745 * Oops. Shadowed guest register already allocated!
4746 *
4747 * Allocate a new register, copy the value and, if updating, the
4748 * guest shadow copy assignment to the new register.
4749 */
4750 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4751 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
4752 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
4753 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
4754
4755 /** @todo share register for readonly access. */
4756 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
4757 enmIntendedUse == kIemNativeGstRegUse_Calculation);
4758
4759 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4760 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4761
4762 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4763 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4764 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
4765 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4766 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4767 else
4768 {
4769 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4770 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
4771 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4772 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4773 }
4774 idxReg = idxRegNew;
4775 }
4776 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
4777
4778#ifdef VBOX_STRICT
4779 /* Strict builds: Check that the value is correct. */
4780 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4781#endif
4782
4783#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4784 /** @todo r=aeichner Implement for registers other than GPR as well. */
4785 if ( ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4786 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
4787 && enmGstReg >= kIemNativeGstReg_GprFirst
4788 && enmGstReg <= kIemNativeGstReg_GprLast
4789 )
4790 {
4791# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4792 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
4793 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
4794# endif
4795
4796 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
4797 }
4798#endif
4799
4800 return idxReg;
4801 }
4802
4803 /*
4804 * Allocate a new register, load it with the guest value and designate it as a copy of the
4805 */
4806 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
4807
4808 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4809 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
4810
4811 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4812 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
4813 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
4814 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4815
4816#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4817 /** @todo r=aeichner Implement for registers other than GPR as well. */
4818 if ( ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4819 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
4820 && enmGstReg >= kIemNativeGstReg_GprFirst
4821 && enmGstReg <= kIemNativeGstReg_GprLast
4822 )
4823 {
4824# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4825 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
4826 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxRegNew);
4827# endif
4828
4829 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
4830 }
4831#endif
4832
4833 return idxRegNew;
4834}
4835
4836
4837/**
4838 * Allocates a temporary host general purpose register that already holds the
4839 * given guest register value.
4840 *
4841 * The use case for this function is places where the shadowing state cannot be
4842 * modified due to branching and such. This will fail if the we don't have a
4843 * current shadow copy handy or if it's incompatible. The only code that will
4844 * be emitted here is value checking code in strict builds.
4845 *
4846 * The intended use can only be readonly!
4847 *
4848 * @returns The host register number, UINT8_MAX if not present.
4849 * @param pReNative The native recompile state.
4850 * @param poff Pointer to the instruction buffer offset.
4851 * Will be updated in strict builds if a register is
4852 * found.
4853 * @param enmGstReg The guest register that will is to be updated.
4854 * @note In strict builds, this may throw instruction buffer growth failures.
4855 * Non-strict builds will not throw anything.
4856 * @sa iemNativeRegAllocTmpForGuestReg
4857 */
4858DECL_HIDDEN_THROW(uint8_t)
4859iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4860{
4861 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4862#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4863 AssertMsg( pReNative->idxCurCall == 0
4864 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4865 || enmGstReg == kIemNativeGstReg_Pc,
4866 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4867#endif
4868
4869 /*
4870 * First check if the guest register value is already in a host register.
4871 */
4872 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4873 {
4874 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4875 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4876 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4877 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4878
4879 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4880 {
4881 /*
4882 * We only do readonly use here, so easy compared to the other
4883 * variant of this code.
4884 */
4885 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4886 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4887 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4888 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
4889 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4890
4891#ifdef VBOX_STRICT
4892 /* Strict builds: Check that the value is correct. */
4893 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4894#else
4895 RT_NOREF(poff);
4896#endif
4897 return idxReg;
4898 }
4899 }
4900
4901 return UINT8_MAX;
4902}
4903
4904
4905/**
4906 * Allocates argument registers for a function call.
4907 *
4908 * @returns New code buffer offset on success; throws VBox status code on failure, so no
4909 * need to check the return value.
4910 * @param pReNative The native recompile state.
4911 * @param off The current code buffer offset.
4912 * @param cArgs The number of arguments the function call takes.
4913 */
4914DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
4915{
4916 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
4917 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
4918 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4919 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4920
4921 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4922 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4923 else if (cArgs == 0)
4924 return true;
4925
4926 /*
4927 * Do we get luck and all register are free and not shadowing anything?
4928 */
4929 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
4930 for (uint32_t i = 0; i < cArgs; i++)
4931 {
4932 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4933 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4934 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4935 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4936 }
4937 /*
4938 * Okay, not lucky so we have to free up the registers.
4939 */
4940 else
4941 for (uint32_t i = 0; i < cArgs; i++)
4942 {
4943 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4944 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
4945 {
4946 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4947 {
4948 case kIemNativeWhat_Var:
4949 {
4950 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4951 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4952 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
4953 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4954 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
4955#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4956 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4957#endif
4958
4959 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
4960 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4961 else
4962 {
4963 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4964 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4965 }
4966 break;
4967 }
4968
4969 case kIemNativeWhat_Tmp:
4970 case kIemNativeWhat_Arg:
4971 case kIemNativeWhat_rc:
4972 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4973 default:
4974 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
4975 }
4976
4977 }
4978 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4979 {
4980 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4981 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4982 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4983#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4984 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
4985#endif
4986 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4987 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4988 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4989 }
4990 else
4991 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4992 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4993 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4994 }
4995 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
4996 return true;
4997}
4998
4999
5000DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
5001
5002
5003#if 0
5004/**
5005 * Frees a register assignment of any type.
5006 *
5007 * @param pReNative The native recompile state.
5008 * @param idxHstReg The register to free.
5009 *
5010 * @note Does not update variables.
5011 */
5012DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
5013{
5014 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5015 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
5016 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
5017 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
5018 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
5019 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
5020 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
5021 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
5022 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
5023 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
5024 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
5025 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
5026 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
5027 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5028
5029 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
5030 /* no flushing, right:
5031 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5032 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5033 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5034 */
5035}
5036#endif
5037
5038
5039/**
5040 * Frees a temporary register.
5041 *
5042 * Any shadow copies of guest registers assigned to the host register will not
5043 * be flushed by this operation.
5044 */
5045DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
5046{
5047 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
5048 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
5049 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
5050 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
5051 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5052}
5053
5054
5055/**
5056 * Frees a temporary immediate register.
5057 *
5058 * It is assumed that the call has not modified the register, so it still hold
5059 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
5060 */
5061DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
5062{
5063 iemNativeRegFreeTmp(pReNative, idxHstReg);
5064}
5065
5066
5067/**
5068 * Frees a register assigned to a variable.
5069 *
5070 * The register will be disassociated from the variable.
5071 */
5072DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
5073{
5074 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
5075 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
5076 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
5077 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5078 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
5079#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5080 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
5081#endif
5082
5083 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
5084 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
5085 if (!fFlushShadows)
5086 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
5087 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
5088 else
5089 {
5090 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5091 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5092#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5093 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadowsOld));
5094#endif
5095 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5096 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
5097 uint64_t fGstRegShadows = fGstRegShadowsOld;
5098 while (fGstRegShadows)
5099 {
5100 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
5101 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
5102
5103 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
5104 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
5105 }
5106 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
5107 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
5108 }
5109}
5110
5111
5112#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5113# ifdef LOG_ENABLED
5114/** Host CPU SIMD register names. */
5115DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
5116{
5117# ifdef RT_ARCH_AMD64
5118 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
5119# elif RT_ARCH_ARM64
5120 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
5121 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
5122# else
5123# error "port me"
5124# endif
5125};
5126# endif
5127
5128
5129/**
5130 * Frees a SIMD register assigned to a variable.
5131 *
5132 * The register will be disassociated from the variable.
5133 */
5134DECLHIDDEN(void) iemNativeSimdRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
5135{
5136 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstReg));
5137 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
5138 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
5139 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5140 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
5141 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
5142
5143 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
5144 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
5145 if (!fFlushShadows)
5146 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
5147 g_apszIemNativeHstSimdRegNames[idxHstReg], pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows, idxVar));
5148 else
5149 {
5150 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5151 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows;
5152 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
5153 pReNative->Core.bmGstSimdRegShadows &= ~fGstRegShadowsOld;
5154 uint64_t fGstRegShadows = fGstRegShadowsOld;
5155 while (fGstRegShadows)
5156 {
5157 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
5158 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
5159
5160 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxHstReg);
5161 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = UINT8_MAX;
5162 }
5163 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
5164 g_apszIemNativeHstSimdRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
5165 }
5166}
5167
5168
5169/**
5170 * Reassigns a variable to a different SIMD register specified by the caller.
5171 *
5172 * @returns The new code buffer position.
5173 * @param pReNative The native recompile state.
5174 * @param off The current code buffer position.
5175 * @param idxVar The variable index.
5176 * @param idxRegOld The old host register number.
5177 * @param idxRegNew The new host register number.
5178 * @param pszCaller The caller for logging.
5179 */
5180static uint32_t iemNativeSimdRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
5181 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
5182{
5183 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5184 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
5185 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
5186 RT_NOREF(pszCaller);
5187
5188 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5189 & pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows));
5190 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxRegNew, off);
5191
5192 uint64_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
5193 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5194 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
5195
5196 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
5197 pszCaller, idxVar, g_apszIemNativeHstSimdRegNames[idxRegOld], g_apszIemNativeHstSimdRegNames[idxRegNew], fGstRegShadows));
5198 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
5199
5200 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U))
5201 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxRegNew, idxRegOld);
5202 else
5203 {
5204 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U));
5205 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxRegNew, idxRegOld);
5206 }
5207
5208 pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
5209 pReNative->Core.aHstSimdRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
5210 pReNative->Core.aHstSimdRegs[idxRegNew].idxVar = idxVar;
5211 if (fGstRegShadows)
5212 {
5213 pReNative->Core.bmHstSimdRegsWithGstShadow = (pReNative->Core.bmHstSimdRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
5214 | RT_BIT_32(idxRegNew);
5215 while (fGstRegShadows)
5216 {
5217 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
5218 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
5219
5220 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxRegOld);
5221 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = idxRegNew;
5222 }
5223 }
5224
5225 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
5226 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
5227 pReNative->Core.bmHstSimdRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstSimdRegs & ~RT_BIT_32(idxRegOld));
5228 return off;
5229}
5230
5231
5232/**
5233 * Moves a variable to a different register or spills it onto the stack.
5234 *
5235 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
5236 * kinds can easily be recreated if needed later.
5237 *
5238 * @returns The new code buffer position.
5239 * @param pReNative The native recompile state.
5240 * @param off The current code buffer position.
5241 * @param idxVar The variable index.
5242 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
5243 * call-volatile registers.
5244 */
5245DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
5246 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK*/)
5247{
5248 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5249 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
5250 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
5251 Assert(!pVar->fRegAcquired);
5252 Assert(!pVar->fSimdReg);
5253
5254 uint8_t const idxRegOld = pVar->idxReg;
5255 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
5256 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegOld));
5257 Assert(pReNative->Core.aHstSimdRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
5258 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows)
5259 == pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows);
5260 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5261 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxRegOld))
5262 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
5263 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5264 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
5265
5266 /** @todo Add statistics on this.*/
5267 /** @todo Implement basic variable liveness analysis (python) so variables
5268 * can be freed immediately once no longer used. This has the potential to
5269 * be trashing registers and stack for dead variables.
5270 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
5271
5272 /*
5273 * First try move it to a different register, as that's cheaper.
5274 */
5275 fForbiddenRegs |= RT_BIT_32(idxRegOld);
5276 fForbiddenRegs |= IEMNATIVE_SIMD_REG_FIXED_MASK;
5277 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & ~fForbiddenRegs;
5278 if (fRegs)
5279 {
5280 /* Avoid using shadow registers, if possible. */
5281 if (fRegs & ~pReNative->Core.bmHstSimdRegsWithGstShadow)
5282 fRegs &= ~pReNative->Core.bmHstSimdRegsWithGstShadow;
5283 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
5284 return iemNativeSimdRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeSimdRegMoveOrSpillStackVar");
5285 }
5286
5287 /*
5288 * Otherwise we must spill the register onto the stack.
5289 */
5290 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
5291 Log12(("iemNativeSimdRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
5292 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
5293
5294 if (pVar->cbVar == sizeof(RTUINT128U))
5295 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
5296 else
5297 {
5298 Assert(pVar->cbVar == sizeof(RTUINT256U));
5299 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
5300 }
5301
5302 pVar->idxReg = UINT8_MAX;
5303 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
5304 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
5305 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
5306 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
5307 return off;
5308}
5309
5310
5311/**
5312 * Called right before emitting a call instruction to move anything important
5313 * out of call-volatile SIMD registers, free and flush the call-volatile SIMD registers,
5314 * optionally freeing argument variables.
5315 *
5316 * @returns New code buffer offset, UINT32_MAX on failure.
5317 * @param pReNative The native recompile state.
5318 * @param off The code buffer offset.
5319 * @param cArgs The number of arguments the function call takes.
5320 * It is presumed that the host register part of these have
5321 * been allocated as such already and won't need moving,
5322 * just freeing.
5323 * @param fKeepVars Mask of variables that should keep their register
5324 * assignments. Caller must take care to handle these.
5325 */
5326DECL_HIDDEN_THROW(uint32_t)
5327iemNativeSimdRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
5328{
5329 Assert(!cArgs); RT_NOREF(cArgs);
5330
5331 /* fKeepVars will reduce this mask. */
5332 uint32_t fSimdRegsToFree = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
5333
5334 /*
5335 * Move anything important out of volatile registers.
5336 */
5337 uint32_t fSimdRegsToMove = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5338#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
5339 & ~RT_BIT_32(IEMNATIVE_SIMD_REG_FIXED_TMP0)
5340#endif
5341 ;
5342
5343 fSimdRegsToMove &= pReNative->Core.bmHstSimdRegs;
5344 if (!fSimdRegsToMove)
5345 { /* likely */ }
5346 else
5347 {
5348 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: fSimdRegsToMove=%#x\n", fSimdRegsToMove));
5349 while (fSimdRegsToMove != 0)
5350 {
5351 unsigned const idxSimdReg = ASMBitFirstSetU32(fSimdRegsToMove) - 1;
5352 fSimdRegsToMove &= ~RT_BIT_32(idxSimdReg);
5353
5354 switch (pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat)
5355 {
5356 case kIemNativeWhat_Var:
5357 {
5358 uint8_t const idxVar = pReNative->Core.aHstRegs[idxSimdReg].idxVar;
5359 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5360 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
5361 Assert(pVar->idxReg == idxSimdReg);
5362 Assert(pVar->fSimdReg);
5363 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
5364 {
5365 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxSimdReg=%d\n",
5366 idxVar, pVar->enmKind, pVar->idxReg));
5367 if (pVar->enmKind != kIemNativeVarKind_Stack)
5368 pVar->idxReg = UINT8_MAX;
5369 else
5370 off = iemNativeSimdRegMoveOrSpillStackVar(pReNative, off, idxVar);
5371 }
5372 else
5373 fSimdRegsToFree &= ~RT_BIT_32(idxSimdReg);
5374 continue;
5375 }
5376
5377 case kIemNativeWhat_Arg:
5378 AssertMsgFailed(("What?!?: %u\n", idxSimdReg));
5379 continue;
5380
5381 case kIemNativeWhat_rc:
5382 case kIemNativeWhat_Tmp:
5383 AssertMsgFailed(("Missing free: %u\n", idxSimdReg));
5384 continue;
5385
5386 case kIemNativeWhat_FixedReserved:
5387#ifdef RT_ARCH_ARM64
5388 continue; /* On ARM the upper half of the virtual 256-bit register. */
5389#endif
5390
5391 case kIemNativeWhat_FixedTmp:
5392 case kIemNativeWhat_pVCpuFixed:
5393 case kIemNativeWhat_pCtxFixed:
5394 case kIemNativeWhat_PcShadow:
5395 case kIemNativeWhat_Invalid:
5396 case kIemNativeWhat_End:
5397 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
5398 }
5399 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
5400 }
5401 }
5402
5403 /*
5404 * Do the actual freeing.
5405 */
5406 if (pReNative->Core.bmHstSimdRegs & fSimdRegsToFree)
5407 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegs %#x -> %#x\n",
5408 pReNative->Core.bmHstSimdRegs, pReNative->Core.bmHstSimdRegs & ~fSimdRegsToFree));
5409 pReNative->Core.bmHstSimdRegs &= ~fSimdRegsToFree;
5410
5411 /* If there are guest register shadows in any call-volatile register, we
5412 have to clear the corrsponding guest register masks for each register. */
5413 uint32_t fHstSimdRegsWithGstShadow = pReNative->Core.bmHstSimdRegsWithGstShadow & fSimdRegsToFree;
5414 if (fHstSimdRegsWithGstShadow)
5415 {
5416 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
5417 pReNative->Core.bmHstSimdRegsWithGstShadow, pReNative->Core.bmHstSimdRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK, fHstSimdRegsWithGstShadow));
5418 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~fHstSimdRegsWithGstShadow;
5419 do
5420 {
5421 unsigned const idxSimdReg = ASMBitFirstSetU32(fHstSimdRegsWithGstShadow) - 1;
5422 fHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxSimdReg);
5423
5424 AssertMsg(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows != 0, ("idxSimdReg=%#x\n", idxSimdReg));
5425
5426#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5427 /*
5428 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
5429 * to call volatile registers).
5430 */
5431 if ( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5432 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows)
5433 off = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, off, idxSimdReg);
5434#endif
5435 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5436 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows));
5437
5438 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows;
5439 pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows = 0;
5440 } while (fHstSimdRegsWithGstShadow != 0);
5441 }
5442
5443 return off;
5444}
5445#endif
5446
5447
5448/**
5449 * Called right before emitting a call instruction to move anything important
5450 * out of call-volatile registers, free and flush the call-volatile registers,
5451 * optionally freeing argument variables.
5452 *
5453 * @returns New code buffer offset, UINT32_MAX on failure.
5454 * @param pReNative The native recompile state.
5455 * @param off The code buffer offset.
5456 * @param cArgs The number of arguments the function call takes.
5457 * It is presumed that the host register part of these have
5458 * been allocated as such already and won't need moving,
5459 * just freeing.
5460 * @param fKeepVars Mask of variables that should keep their register
5461 * assignments. Caller must take care to handle these.
5462 */
5463DECL_HIDDEN_THROW(uint32_t)
5464iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
5465{
5466 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
5467
5468 /* fKeepVars will reduce this mask. */
5469 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
5470
5471 /*
5472 * Move anything important out of volatile registers.
5473 */
5474 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
5475 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
5476 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
5477#ifdef IEMNATIVE_REG_FIXED_TMP0
5478 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
5479#endif
5480#ifdef IEMNATIVE_REG_FIXED_TMP1
5481 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
5482#endif
5483#ifdef IEMNATIVE_REG_FIXED_PC_DBG
5484 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
5485#endif
5486 & ~g_afIemNativeCallRegs[cArgs];
5487
5488 fRegsToMove &= pReNative->Core.bmHstRegs;
5489 if (!fRegsToMove)
5490 { /* likely */ }
5491 else
5492 {
5493 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
5494 while (fRegsToMove != 0)
5495 {
5496 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
5497 fRegsToMove &= ~RT_BIT_32(idxReg);
5498
5499 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
5500 {
5501 case kIemNativeWhat_Var:
5502 {
5503 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
5504 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5505 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
5506 Assert(pVar->idxReg == idxReg);
5507#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5508 Assert(!pVar->fSimdReg);
5509#endif
5510 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
5511 {
5512 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
5513 idxVar, pVar->enmKind, pVar->idxReg));
5514 if (pVar->enmKind != kIemNativeVarKind_Stack)
5515 pVar->idxReg = UINT8_MAX;
5516 else
5517 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
5518 }
5519 else
5520 fRegsToFree &= ~RT_BIT_32(idxReg);
5521 continue;
5522 }
5523
5524 case kIemNativeWhat_Arg:
5525 AssertMsgFailed(("What?!?: %u\n", idxReg));
5526 continue;
5527
5528 case kIemNativeWhat_rc:
5529 case kIemNativeWhat_Tmp:
5530 AssertMsgFailed(("Missing free: %u\n", idxReg));
5531 continue;
5532
5533 case kIemNativeWhat_FixedTmp:
5534 case kIemNativeWhat_pVCpuFixed:
5535 case kIemNativeWhat_pCtxFixed:
5536 case kIemNativeWhat_PcShadow:
5537 case kIemNativeWhat_FixedReserved:
5538 case kIemNativeWhat_Invalid:
5539 case kIemNativeWhat_End:
5540 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
5541 }
5542 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
5543 }
5544 }
5545
5546 /*
5547 * Do the actual freeing.
5548 */
5549 if (pReNative->Core.bmHstRegs & fRegsToFree)
5550 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
5551 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
5552 pReNative->Core.bmHstRegs &= ~fRegsToFree;
5553
5554 /* If there are guest register shadows in any call-volatile register, we
5555 have to clear the corrsponding guest register masks for each register. */
5556 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
5557 if (fHstRegsWithGstShadow)
5558 {
5559 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
5560 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
5561 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
5562 do
5563 {
5564 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
5565 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5566
5567 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
5568
5569#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5570 /*
5571 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
5572 * to call volatile registers).
5573 */
5574 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
5575 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxReg);
5576 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
5577#endif
5578
5579 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
5580 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
5581 } while (fHstRegsWithGstShadow != 0);
5582 }
5583
5584#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5585 /* Now for the SIMD registers, no argument support for now. */
5586 off = iemNativeSimdRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /*cArgs*/, fKeepVars);
5587#endif
5588
5589 return off;
5590}
5591
5592
5593/**
5594 * Flushes a set of guest register shadow copies.
5595 *
5596 * This is usually done after calling a threaded function or a C-implementation
5597 * of an instruction.
5598 *
5599 * @param pReNative The native recompile state.
5600 * @param fGstRegs Set of guest registers to flush.
5601 */
5602DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
5603{
5604 /*
5605 * Reduce the mask by what's currently shadowed
5606 */
5607 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
5608 fGstRegs &= bmGstRegShadowsOld;
5609 if (fGstRegs)
5610 {
5611 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
5612 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
5613 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
5614 if (bmGstRegShadowsNew)
5615 {
5616 /*
5617 * Partial.
5618 */
5619 do
5620 {
5621 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5622 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5623 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5624 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
5625 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5626#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5627 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
5628#endif
5629
5630 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
5631 fGstRegs &= ~fInThisHstReg;
5632 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5633 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5634 if (!fGstRegShadowsNew)
5635 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5636 } while (fGstRegs != 0);
5637 }
5638 else
5639 {
5640 /*
5641 * Clear all.
5642 */
5643 do
5644 {
5645 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5646 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5647 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5648 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
5649 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5650#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5651 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
5652#endif
5653
5654 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5655 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5656 } while (fGstRegs != 0);
5657 pReNative->Core.bmHstRegsWithGstShadow = 0;
5658 }
5659 }
5660}
5661
5662
5663/**
5664 * Flushes guest register shadow copies held by a set of host registers.
5665 *
5666 * This is used with the TLB lookup code for ensuring that we don't carry on
5667 * with any guest shadows in volatile registers, as these will get corrupted by
5668 * a TLB miss.
5669 *
5670 * @param pReNative The native recompile state.
5671 * @param fHstRegs Set of host registers to flush guest shadows for.
5672 */
5673DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
5674{
5675 /*
5676 * Reduce the mask by what's currently shadowed.
5677 */
5678 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
5679 fHstRegs &= bmHstRegsWithGstShadowOld;
5680 if (fHstRegs)
5681 {
5682 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
5683 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
5684 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
5685 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
5686 if (bmHstRegsWithGstShadowNew)
5687 {
5688 /*
5689 * Partial (likely).
5690 */
5691 uint64_t fGstShadows = 0;
5692 do
5693 {
5694 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5695 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
5696 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
5697 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
5698#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5699 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5700#endif
5701
5702 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5703 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5704 fHstRegs &= ~RT_BIT_32(idxHstReg);
5705 } while (fHstRegs != 0);
5706 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
5707 }
5708 else
5709 {
5710 /*
5711 * Clear all.
5712 */
5713 do
5714 {
5715 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5716 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
5717 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
5718 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
5719#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5720 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5721#endif
5722
5723 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5724 fHstRegs &= ~RT_BIT_32(idxHstReg);
5725 } while (fHstRegs != 0);
5726 pReNative->Core.bmGstRegShadows = 0;
5727 }
5728 }
5729}
5730
5731
5732/**
5733 * Restores guest shadow copies in volatile registers.
5734 *
5735 * This is used after calling a helper function (think TLB miss) to restore the
5736 * register state of volatile registers.
5737 *
5738 * @param pReNative The native recompile state.
5739 * @param off The code buffer offset.
5740 * @param fHstRegsActiveShadows Set of host registers which are allowed to
5741 * be active (allocated) w/o asserting. Hack.
5742 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
5743 * iemNativeVarRestoreVolatileRegsPostHlpCall()
5744 */
5745DECL_HIDDEN_THROW(uint32_t)
5746iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
5747{
5748 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
5749 if (fHstRegs)
5750 {
5751 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
5752 do
5753 {
5754 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5755
5756 /* It's not fatal if a register is active holding a variable that
5757 shadowing a guest register, ASSUMING all pending guest register
5758 writes were flushed prior to the helper call. However, we'll be
5759 emitting duplicate restores, so it wasts code space. */
5760 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
5761 RT_NOREF(fHstRegsActiveShadows);
5762
5763 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5764#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5765 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadows));
5766#endif
5767 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
5768 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
5769 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
5770
5771 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
5772 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
5773
5774 fHstRegs &= ~RT_BIT_32(idxHstReg);
5775 } while (fHstRegs != 0);
5776 }
5777 return off;
5778}
5779
5780
5781
5782
5783/*********************************************************************************************************************************
5784* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
5785*********************************************************************************************************************************/
5786#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5787
5788/**
5789 * Info about shadowed guest SIMD register values.
5790 * @see IEMNATIVEGSTSIMDREG
5791 */
5792static struct
5793{
5794 /** Offset in VMCPU of XMM (low 128-bit) registers. */
5795 uint32_t offXmm;
5796 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
5797 uint32_t offYmm;
5798 /** Name (for logging). */
5799 const char *pszName;
5800} const g_aGstSimdShadowInfo[] =
5801{
5802#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
5803 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
5804 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
5805 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
5806 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
5807 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
5808 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
5809 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
5810 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
5811 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
5812 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
5813 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
5814 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
5815 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
5816 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
5817 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
5818 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
5819 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
5820#undef CPUMCTX_OFF_AND_SIZE
5821};
5822AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
5823
5824
5825/**
5826 * Frees a temporary SIMD register.
5827 *
5828 * Any shadow copies of guest registers assigned to the host register will not
5829 * be flushed by this operation.
5830 */
5831DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
5832{
5833 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
5834 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
5835 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5836 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
5837 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5838}
5839
5840
5841/**
5842 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
5843 *
5844 * @returns New code bufferoffset.
5845 * @param pReNative The native recompile state.
5846 * @param off Current code buffer position.
5847 * @param enmGstSimdReg The guest SIMD register to flush.
5848 */
5849DECL_HIDDEN_THROW(uint32_t)
5850iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
5851{
5852 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5853
5854 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
5855 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
5856 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg),
5857 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)));
5858
5859 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
5860 {
5861 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5862 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
5863 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5864 }
5865
5866 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))
5867 {
5868 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5869 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
5870 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5871 }
5872
5873 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, enmGstSimdReg);
5874 return off;
5875}
5876
5877
5878/**
5879 * Flush the given set of guest SIMD registers if marked as dirty.
5880 *
5881 * @returns New code buffer offset.
5882 * @param pReNative The native recompile state.
5883 * @param off Current code buffer position.
5884 * @param fFlushGstSimdReg The guest SIMD register set to flush (default is flush everything).
5885 */
5886DECL_HIDDEN_THROW(uint32_t)
5887iemNativeSimdRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstSimdReg /*= UINT64_MAX*/)
5888{
5889 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5890 & fFlushGstSimdReg;
5891 if (bmGstSimdRegShadowDirty)
5892 {
5893# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5894 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5895 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
5896# endif
5897
5898 uint32_t idxGstSimdReg = 0;
5899 do
5900 {
5901 if (bmGstSimdRegShadowDirty & 0x1)
5902 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5903
5904 idxGstSimdReg++;
5905 bmGstSimdRegShadowDirty >>= 1;
5906 } while (bmGstSimdRegShadowDirty);
5907 }
5908
5909 return off;
5910}
5911
5912
5913#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5914/**
5915 * Flush all shadowed guest SIMD registers marked as dirty for the given host SIMD register.
5916 *
5917 * @returns New code buffer offset.
5918 * @param pReNative The native recompile state.
5919 * @param off Current code buffer position.
5920 * @param idxHstSimdReg The host SIMD register.
5921 *
5922 * @note This doesn't do any unshadowing of guest registers from the host register.
5923 */
5924DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxHstSimdReg)
5925{
5926 /* We need to flush any pending guest register writes this host register shadows. */
5927 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5928 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
5929 if (bmGstSimdRegShadowDirty)
5930 {
5931# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5932 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5933 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
5934# endif
5935
5936 uint32_t idxGstSimdReg = 0;
5937 do
5938 {
5939 if (bmGstSimdRegShadowDirty & 0x1)
5940 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5941
5942 idxGstSimdReg++;
5943 bmGstSimdRegShadowDirty >>= 1;
5944 } while (bmGstSimdRegShadowDirty);
5945 }
5946
5947 return off;
5948}
5949#endif
5950
5951
5952/**
5953 * Locate a register, possibly freeing one up.
5954 *
5955 * This ASSUMES the caller has done the minimal/optimal allocation checks and
5956 * failed.
5957 *
5958 * @returns Host register number on success. Returns UINT8_MAX if no registers
5959 * found, the caller is supposed to deal with this and raise a
5960 * allocation type specific status code (if desired).
5961 *
5962 * @throws VBox status code if we're run into trouble spilling a variable of
5963 * recording debug info. Does NOT throw anything if we're out of
5964 * registers, though.
5965 */
5966static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
5967 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
5968{
5969 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFree);
5970 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5971 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5972
5973 /*
5974 * Try a freed register that's shadowing a guest register.
5975 */
5976 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
5977 if (fRegs)
5978 {
5979 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeNoVar);
5980
5981#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5982 /*
5983 * When we have livness information, we use it to kick out all shadowed
5984 * guest register that will not be needed any more in this TB. If we're
5985 * lucky, this may prevent us from ending up here again.
5986 *
5987 * Note! We must consider the previous entry here so we don't free
5988 * anything that the current threaded function requires (current
5989 * entry is produced by the next threaded function).
5990 */
5991 uint32_t const idxCurCall = pReNative->idxCurCall;
5992 if (idxCurCall > 0)
5993 {
5994 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
5995
5996# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5997 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
5998 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
5999 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
6000#else
6001 /* Construct a mask of the registers not in the read or write state.
6002 Note! We could skips writes, if they aren't from us, as this is just
6003 a hack to prevent trashing registers that have just been written
6004 or will be written when we retire the current instruction. */
6005 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
6006 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
6007 & IEMLIVENESSBIT_MASK;
6008#endif
6009 /* If it matches any shadowed registers. */
6010 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
6011 {
6012 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessUnshadowed);
6013 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
6014 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
6015
6016 /* See if we've got any unshadowed registers we can return now. */
6017 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
6018 if (fUnshadowedRegs)
6019 {
6020 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessHelped);
6021 return (fPreferVolatile
6022 ? ASMBitFirstSetU32(fUnshadowedRegs)
6023 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
6024 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
6025 - 1;
6026 }
6027 }
6028 }
6029#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
6030
6031 unsigned const idxReg = (fPreferVolatile
6032 ? ASMBitFirstSetU32(fRegs)
6033 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
6034 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
6035 - 1;
6036
6037 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
6038 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
6039 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
6040 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
6041
6042 /* We need to flush any pending guest register writes this host SIMD register shadows. */
6043 uint32_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
6044 uint32_t idxGstSimdReg = 0;
6045 do
6046 {
6047 if (fGstRegShadows & 0x1)
6048 {
6049 *poff = iemNativeSimdRegFlushPendingWrite(pReNative, *poff, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
6050 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg));
6051 }
6052 idxGstSimdReg++;
6053 fGstRegShadows >>= 1;
6054 } while (fGstRegShadows);
6055
6056 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
6057 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
6058 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
6059 pReNative->Core.aHstSimdRegs[idxReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
6060 return idxReg;
6061 }
6062
6063 AssertFailed(); /** @todo The following needs testing when it actually gets hit. */
6064
6065 /*
6066 * Try free up a variable that's in a register.
6067 *
6068 * We do two rounds here, first evacuating variables we don't need to be
6069 * saved on the stack, then in the second round move things to the stack.
6070 */
6071 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeVar);
6072 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
6073 {
6074 uint32_t fVars = pReNative->Core.bmVars;
6075 while (fVars)
6076 {
6077 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
6078 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
6079 if (!pReNative->Core.aVars[idxVar].fSimdReg) /* Ignore non SIMD variables here. */
6080 continue;
6081
6082 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
6083 && (RT_BIT_32(idxReg) & fRegMask)
6084 && ( iLoop == 0
6085 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
6086 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
6087 && !pReNative->Core.aVars[idxVar].fRegAcquired)
6088 {
6089 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxReg));
6090 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
6091 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
6092 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstSimdReg_End));
6093 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg))
6094 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
6095
6096 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
6097 {
6098 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
6099 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
6100 }
6101
6102 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
6103 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
6104
6105 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
6106 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
6107 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
6108 return idxReg;
6109 }
6110 fVars &= ~RT_BIT_32(idxVar);
6111 }
6112 }
6113
6114 AssertFailed();
6115 return UINT8_MAX;
6116}
6117
6118
6119/**
6120 * Flushes a set of guest register shadow copies.
6121 *
6122 * This is usually done after calling a threaded function or a C-implementation
6123 * of an instruction.
6124 *
6125 * @param pReNative The native recompile state.
6126 * @param fGstSimdRegs Set of guest SIMD registers to flush.
6127 */
6128DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
6129{
6130 /*
6131 * Reduce the mask by what's currently shadowed
6132 */
6133 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
6134 fGstSimdRegs &= bmGstSimdRegShadows;
6135 if (fGstSimdRegs)
6136 {
6137 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
6138 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
6139 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
6140 if (bmGstSimdRegShadowsNew)
6141 {
6142 /*
6143 * Partial.
6144 */
6145 do
6146 {
6147 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
6148 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
6149 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
6150 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
6151 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
6152 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
6153
6154 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
6155 fGstSimdRegs &= ~fInThisHstReg;
6156 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
6157 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
6158 if (!fGstRegShadowsNew)
6159 {
6160 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
6161 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
6162 }
6163 } while (fGstSimdRegs != 0);
6164 }
6165 else
6166 {
6167 /*
6168 * Clear all.
6169 */
6170 do
6171 {
6172 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
6173 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
6174 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
6175 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
6176 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
6177 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
6178
6179 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
6180 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
6181 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
6182 } while (fGstSimdRegs != 0);
6183 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
6184 }
6185 }
6186}
6187
6188
6189/**
6190 * Allocates a temporary host SIMD register.
6191 *
6192 * This may emit code to save register content onto the stack in order to free
6193 * up a register.
6194 *
6195 * @returns The host register number; throws VBox status code on failure,
6196 * so no need to check the return value.
6197 * @param pReNative The native recompile state.
6198 * @param poff Pointer to the variable with the code buffer position.
6199 * This will be update if we need to move a variable from
6200 * register to stack in order to satisfy the request.
6201 * @param fPreferVolatile Whether to prefer volatile over non-volatile
6202 * registers (@c true, default) or the other way around
6203 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
6204 */
6205DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
6206{
6207 /*
6208 * Try find a completely unused register, preferably a call-volatile one.
6209 */
6210 uint8_t idxSimdReg;
6211 uint32_t fRegs = ~pReNative->Core.bmHstRegs
6212 & ~pReNative->Core.bmHstRegsWithGstShadow
6213 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
6214 if (fRegs)
6215 {
6216 if (fPreferVolatile)
6217 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
6218 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
6219 else
6220 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
6221 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
6222 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
6223 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
6224
6225 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
6226 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
6227 }
6228 else
6229 {
6230 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
6231 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
6232 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
6233 }
6234
6235 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
6236 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
6237}
6238
6239
6240/**
6241 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
6242 * registers.
6243 *
6244 * @returns The host register number; throws VBox status code on failure,
6245 * so no need to check the return value.
6246 * @param pReNative The native recompile state.
6247 * @param poff Pointer to the variable with the code buffer position.
6248 * This will be update if we need to move a variable from
6249 * register to stack in order to satisfy the request.
6250 * @param fRegMask Mask of acceptable registers.
6251 * @param fPreferVolatile Whether to prefer volatile over non-volatile
6252 * registers (@c true, default) or the other way around
6253 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
6254 */
6255DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
6256 bool fPreferVolatile /*= true*/)
6257{
6258 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
6259 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
6260
6261 /*
6262 * Try find a completely unused register, preferably a call-volatile one.
6263 */
6264 uint8_t idxSimdReg;
6265 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
6266 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
6267 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
6268 & fRegMask;
6269 if (fRegs)
6270 {
6271 if (fPreferVolatile)
6272 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
6273 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
6274 else
6275 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
6276 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
6277 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
6278 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
6279
6280 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
6281 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
6282 }
6283 else
6284 {
6285 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
6286 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
6287 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
6288 }
6289
6290 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
6291 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
6292}
6293
6294
6295/**
6296 * Sets the indiactor for which part of the given SIMD register has valid data loaded.
6297 *
6298 * @param pReNative The native recompile state.
6299 * @param idxHstSimdReg The host SIMD register to update the state for.
6300 * @param enmLoadSz The load size to set.
6301 */
6302DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg,
6303 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6304{
6305 /* Everything valid already? -> nothing to do. */
6306 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
6307 return;
6308
6309 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid)
6310 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
6311 else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz)
6312 {
6313 Assert( ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128
6314 && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
6315 || ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128
6316 && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128));
6317 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256;
6318 }
6319}
6320
6321
6322static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdRegDst,
6323 uint8_t idxHstSimdRegDst, uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
6324{
6325 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
6326 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst
6327 || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
6328 {
6329# ifdef RT_ARCH_ARM64
6330 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
6331 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
6332# endif
6333
6334 if (idxHstSimdRegDst != idxHstSimdRegSrc)
6335 {
6336 switch (enmLoadSzDst)
6337 {
6338 case kIemNativeGstSimdRegLdStSz_256:
6339 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
6340 break;
6341 case kIemNativeGstSimdRegLdStSz_Low128:
6342 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
6343 break;
6344 case kIemNativeGstSimdRegLdStSz_High128:
6345 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
6346 break;
6347 default:
6348 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6349 }
6350
6351 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst);
6352 }
6353 }
6354 else
6355 {
6356 /* The source doesn't has the part loaded, so load the register from CPUMCTX. */
6357 Assert(enmLoadSzDst == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSzDst == kIemNativeGstSimdRegLdStSz_High128);
6358 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, idxHstSimdRegDst, enmGstSimdRegDst, enmLoadSzDst);
6359 }
6360
6361 return off;
6362}
6363
6364
6365/**
6366 * Allocates a temporary host SIMD register for keeping a guest
6367 * SIMD register value.
6368 *
6369 * Since we may already have a register holding the guest register value,
6370 * code will be emitted to do the loading if that's not the case. Code may also
6371 * be emitted if we have to free up a register to satify the request.
6372 *
6373 * @returns The host register number; throws VBox status code on failure, so no
6374 * need to check the return value.
6375 * @param pReNative The native recompile state.
6376 * @param poff Pointer to the variable with the code buffer
6377 * position. This will be update if we need to move a
6378 * variable from register to stack in order to satisfy
6379 * the request.
6380 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
6381 * @param enmIntendedUse How the caller will be using the host register.
6382 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
6383 * register is okay (default). The ASSUMPTION here is
6384 * that the caller has already flushed all volatile
6385 * registers, so this is only applied if we allocate a
6386 * new register.
6387 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
6388 */
6389DECL_HIDDEN_THROW(uint8_t)
6390iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
6391 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
6392 bool fNoVolatileRegs /*= false*/)
6393{
6394 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
6395#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
6396 AssertMsg( pReNative->idxCurCall == 0
6397 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
6398 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
6399 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
6400 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
6401 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
6402 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
6403#endif
6404#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
6405 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
6406#endif
6407 uint32_t const fRegMask = !fNoVolatileRegs
6408 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
6409 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
6410
6411 /*
6412 * First check if the guest register value is already in a host register.
6413 */
6414 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
6415 {
6416 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
6417 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
6418 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
6419 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
6420
6421 /* It's not supposed to be allocated... */
6422 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
6423 {
6424 /*
6425 * If the register will trash the guest shadow copy, try find a
6426 * completely unused register we can use instead. If that fails,
6427 * we need to disassociate the host reg from the guest reg.
6428 */
6429 /** @todo would be nice to know if preserving the register is in any way helpful. */
6430 /* If the purpose is calculations, try duplicate the register value as
6431 we'll be clobbering the shadow. */
6432 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
6433 && ( ~pReNative->Core.bmHstSimdRegs
6434 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
6435 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
6436 {
6437 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
6438
6439 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
6440
6441 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
6442 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
6443 g_apszIemNativeHstSimdRegNames[idxRegNew]));
6444 idxSimdReg = idxRegNew;
6445 }
6446 /* If the current register matches the restrictions, go ahead and allocate
6447 it for the caller. */
6448 else if (fRegMask & RT_BIT_32(idxSimdReg))
6449 {
6450 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
6451 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
6452 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
6453 {
6454 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
6455 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxSimdReg, idxSimdReg, enmLoadSz);
6456 else
6457 iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz);
6458 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
6459 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
6460 }
6461 else
6462 {
6463 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
6464 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
6465 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
6466 }
6467 }
6468 /* Otherwise, allocate a register that satisfies the caller and transfer
6469 the shadowing if compatible with the intended use. (This basically
6470 means the call wants a non-volatile register (RSP push/pop scenario).) */
6471 else
6472 {
6473 Assert(fNoVolatileRegs);
6474 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
6475 !fNoVolatileRegs
6476 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
6477 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
6478 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
6479 {
6480 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
6481 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
6482 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
6483 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
6484 }
6485 else
6486 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
6487 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
6488 g_apszIemNativeHstSimdRegNames[idxRegNew]));
6489 idxSimdReg = idxRegNew;
6490 }
6491 }
6492 else
6493 {
6494 /*
6495 * Oops. Shadowed guest register already allocated!
6496 *
6497 * Allocate a new register, copy the value and, if updating, the
6498 * guest shadow copy assignment to the new register.
6499 */
6500 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
6501 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
6502 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
6503 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
6504
6505 /** @todo share register for readonly access. */
6506 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
6507 enmIntendedUse == kIemNativeGstRegUse_Calculation);
6508
6509 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
6510 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
6511 else
6512 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
6513
6514 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
6515 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
6516 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
6517 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
6518 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
6519 else
6520 {
6521 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
6522 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
6523 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
6524 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
6525 }
6526 idxSimdReg = idxRegNew;
6527 }
6528 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
6529
6530#ifdef VBOX_STRICT
6531 /* Strict builds: Check that the value is correct. */
6532 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
6533 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
6534#endif
6535
6536 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
6537 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
6538 {
6539# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6540 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
6541 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxSimdReg);
6542# endif
6543
6544 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
6545 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
6546 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
6547 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
6548 else
6549 {
6550 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
6551 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
6552 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
6553 }
6554 }
6555
6556 return idxSimdReg;
6557 }
6558
6559 /*
6560 * Allocate a new register, load it with the guest value and designate it as a copy of the
6561 */
6562 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
6563
6564 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
6565 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
6566 else
6567 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
6568
6569 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
6570 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
6571
6572 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
6573 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
6574 {
6575# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6576 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
6577 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxRegNew);
6578# endif
6579
6580 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
6581 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
6582 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
6583 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
6584 else
6585 {
6586 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
6587 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
6588 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
6589 }
6590 }
6591
6592 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
6593 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
6594
6595 return idxRegNew;
6596}
6597
6598
6599/**
6600 * Flushes guest SIMD register shadow copies held by a set of host registers.
6601 *
6602 * This is used whenever calling an external helper for ensuring that we don't carry on
6603 * with any guest shadows in volatile registers, as these will get corrupted by the caller.
6604 *
6605 * @param pReNative The native recompile state.
6606 * @param fHstSimdRegs Set of host SIMD registers to flush guest shadows for.
6607 */
6608DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstSimdRegs) RT_NOEXCEPT
6609{
6610 /*
6611 * Reduce the mask by what's currently shadowed.
6612 */
6613 uint32_t const bmHstSimdRegsWithGstShadowOld = pReNative->Core.bmHstSimdRegsWithGstShadow;
6614 fHstSimdRegs &= bmHstSimdRegsWithGstShadowOld;
6615 if (fHstSimdRegs)
6616 {
6617 uint32_t const bmHstSimdRegsWithGstShadowNew = bmHstSimdRegsWithGstShadowOld & ~fHstSimdRegs;
6618 Log12(("iemNativeSimdRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
6619 fHstSimdRegs, bmHstSimdRegsWithGstShadowOld, bmHstSimdRegsWithGstShadowNew));
6620 pReNative->Core.bmHstSimdRegsWithGstShadow = bmHstSimdRegsWithGstShadowNew;
6621 if (bmHstSimdRegsWithGstShadowNew)
6622 {
6623 /*
6624 * Partial (likely).
6625 */
6626 uint64_t fGstShadows = 0;
6627 do
6628 {
6629 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
6630 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
6631 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
6632 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
6633 Assert(!(( pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
6634 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
6635
6636 fGstShadows |= pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
6637 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
6638 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
6639 } while (fHstSimdRegs != 0);
6640 pReNative->Core.bmGstSimdRegShadows &= ~fGstShadows;
6641 }
6642 else
6643 {
6644 /*
6645 * Clear all.
6646 */
6647 do
6648 {
6649 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
6650 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
6651 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
6652 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
6653 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
6654 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
6655
6656 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
6657 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
6658 } while (fHstSimdRegs != 0);
6659 pReNative->Core.bmGstSimdRegShadows = 0;
6660 }
6661 }
6662}
6663#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6664
6665
6666
6667/*********************************************************************************************************************************
6668* Code emitters for flushing pending guest register writes and sanity checks *
6669*********************************************************************************************************************************/
6670
6671#ifdef VBOX_STRICT
6672/**
6673 * Does internal register allocator sanity checks.
6674 */
6675DECLHIDDEN(void) iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
6676{
6677 /*
6678 * Iterate host registers building a guest shadowing set.
6679 */
6680 uint64_t bmGstRegShadows = 0;
6681 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
6682 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
6683 while (bmHstRegsWithGstShadow)
6684 {
6685 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
6686 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
6687 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
6688
6689 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
6690 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
6691 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
6692 bmGstRegShadows |= fThisGstRegShadows;
6693 while (fThisGstRegShadows)
6694 {
6695 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
6696 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
6697 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
6698 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
6699 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
6700 }
6701 }
6702 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
6703 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
6704 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
6705
6706 /*
6707 * Now the other way around, checking the guest to host index array.
6708 */
6709 bmHstRegsWithGstShadow = 0;
6710 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
6711 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
6712 while (bmGstRegShadows)
6713 {
6714 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
6715 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
6716 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
6717
6718 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
6719 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
6720 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
6721 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
6722 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
6723 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
6724 }
6725 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
6726 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
6727 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
6728}
6729#endif /* VBOX_STRICT */
6730
6731
6732/**
6733 * Flushes any delayed guest register writes.
6734 *
6735 * This must be called prior to calling CImpl functions and any helpers that use
6736 * the guest state (like raising exceptions) and such.
6737 *
6738 * @note This function does not flush any shadowing information for guest registers. This needs to be done by
6739 * the caller if it wishes to do so.
6740 */
6741DECL_HIDDEN_THROW(uint32_t)
6742iemNativeRegFlushPendingWritesSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept, uint64_t fGstSimdShwExcept)
6743{
6744#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6745 if (!(fGstShwExcept & kIemNativeGstReg_Pc))
6746 off = iemNativeEmitPcWriteback(pReNative, off);
6747#else
6748 RT_NOREF(pReNative, fGstShwExcept);
6749#endif
6750
6751#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
6752 off = iemNativeRegFlushDirtyGuest(pReNative, off, ~fGstShwExcept);
6753#endif
6754
6755#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6756 off = iemNativeSimdRegFlushDirtyGuest(pReNative, off, ~fGstSimdShwExcept);
6757#endif
6758
6759 return off;
6760}
6761
6762
6763#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6764/**
6765 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
6766 */
6767DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcWritebackSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6768{
6769 Assert(pReNative->Core.offPc);
6770# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6771 iemNativeDbgInfoAddNativeOffset(pReNative, off);
6772 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, pReNative->Core.cInstrPcUpdateSkipped);
6773# endif
6774
6775# ifndef IEMNATIVE_REG_FIXED_PC_DBG
6776 /* Allocate a temporary PC register. */
6777 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6778
6779 /* Perform the addition and store the result. */
6780 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
6781 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6782
6783 /* Free but don't flush the PC register. */
6784 iemNativeRegFreeTmp(pReNative, idxPcReg);
6785# else
6786 /* Compare the shadow with the context value, they should match. */
6787 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
6788 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
6789# endif
6790
6791 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, pReNative->Core.cInstrPcUpdateSkipped);
6792 pReNative->Core.offPc = 0;
6793 pReNative->Core.cInstrPcUpdateSkipped = 0;
6794
6795 return off;
6796}
6797#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
6798
6799
6800/*********************************************************************************************************************************
6801* Code Emitters (larger snippets) *
6802*********************************************************************************************************************************/
6803
6804/**
6805 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
6806 * extending to 64-bit width.
6807 *
6808 * @returns New code buffer offset on success, UINT32_MAX on failure.
6809 * @param pReNative .
6810 * @param off The current code buffer position.
6811 * @param idxHstReg The host register to load the guest register value into.
6812 * @param enmGstReg The guest register to load.
6813 *
6814 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6815 * that is something the caller needs to do if applicable.
6816 */
6817DECL_HIDDEN_THROW(uint32_t)
6818iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
6819{
6820 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
6821 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
6822
6823 switch (g_aGstShadowInfo[enmGstReg].cb)
6824 {
6825 case sizeof(uint64_t):
6826 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6827 case sizeof(uint32_t):
6828 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6829 case sizeof(uint16_t):
6830 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6831#if 0 /* not present in the table. */
6832 case sizeof(uint8_t):
6833 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6834#endif
6835 default:
6836 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6837 }
6838}
6839
6840
6841#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6842/**
6843 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
6844 *
6845 * @returns New code buffer offset on success, UINT32_MAX on failure.
6846 * @param pReNative The recompiler state.
6847 * @param off The current code buffer position.
6848 * @param idxHstSimdReg The host register to load the guest register value into.
6849 * @param enmGstSimdReg The guest register to load.
6850 * @param enmLoadSz The load size of the register.
6851 *
6852 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6853 * that is something the caller needs to do if applicable.
6854 */
6855DECL_HIDDEN_THROW(uint32_t)
6856iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
6857 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6858{
6859 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
6860
6861 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz);
6862 switch (enmLoadSz)
6863 {
6864 case kIemNativeGstSimdRegLdStSz_256:
6865 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6866 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6867 case kIemNativeGstSimdRegLdStSz_Low128:
6868 return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6869 case kIemNativeGstSimdRegLdStSz_High128:
6870 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6871 default:
6872 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6873 }
6874}
6875#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6876
6877#ifdef VBOX_STRICT
6878
6879/**
6880 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
6881 *
6882 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6883 * Trashes EFLAGS on AMD64.
6884 */
6885DECL_HIDDEN_THROW(uint32_t)
6886iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
6887{
6888# ifdef RT_ARCH_AMD64
6889 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6890
6891 /* rol reg64, 32 */
6892 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6893 pbCodeBuf[off++] = 0xc1;
6894 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6895 pbCodeBuf[off++] = 32;
6896
6897 /* test reg32, ffffffffh */
6898 if (idxReg >= 8)
6899 pbCodeBuf[off++] = X86_OP_REX_B;
6900 pbCodeBuf[off++] = 0xf7;
6901 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6902 pbCodeBuf[off++] = 0xff;
6903 pbCodeBuf[off++] = 0xff;
6904 pbCodeBuf[off++] = 0xff;
6905 pbCodeBuf[off++] = 0xff;
6906
6907 /* je/jz +1 */
6908 pbCodeBuf[off++] = 0x74;
6909 pbCodeBuf[off++] = 0x01;
6910
6911 /* int3 */
6912 pbCodeBuf[off++] = 0xcc;
6913
6914 /* rol reg64, 32 */
6915 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6916 pbCodeBuf[off++] = 0xc1;
6917 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6918 pbCodeBuf[off++] = 32;
6919
6920# elif defined(RT_ARCH_ARM64)
6921 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6922 /* lsr tmp0, reg64, #32 */
6923 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
6924 /* cbz tmp0, +1 */
6925 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6926 /* brk #0x1100 */
6927 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
6928
6929# else
6930# error "Port me!"
6931# endif
6932 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6933 return off;
6934}
6935
6936
6937/**
6938 * Emitting code that checks that the content of register @a idxReg is the same
6939 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
6940 * instruction if that's not the case.
6941 *
6942 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6943 * Trashes EFLAGS on AMD64.
6944 */
6945DECL_HIDDEN_THROW(uint32_t)
6946iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
6947{
6948#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6949 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
6950 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg))
6951 return off;
6952#endif
6953
6954# ifdef RT_ARCH_AMD64
6955 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6956
6957 /* cmp reg, [mem] */
6958 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
6959 {
6960 if (idxReg >= 8)
6961 pbCodeBuf[off++] = X86_OP_REX_R;
6962 pbCodeBuf[off++] = 0x38;
6963 }
6964 else
6965 {
6966 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
6967 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
6968 else
6969 {
6970 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
6971 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6972 else
6973 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
6974 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
6975 if (idxReg >= 8)
6976 pbCodeBuf[off++] = X86_OP_REX_R;
6977 }
6978 pbCodeBuf[off++] = 0x39;
6979 }
6980 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
6981
6982 /* je/jz +1 */
6983 pbCodeBuf[off++] = 0x74;
6984 pbCodeBuf[off++] = 0x01;
6985
6986 /* int3 */
6987 pbCodeBuf[off++] = 0xcc;
6988
6989 /* For values smaller than the register size, we must check that the rest
6990 of the register is all zeros. */
6991 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
6992 {
6993 /* test reg64, imm32 */
6994 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6995 pbCodeBuf[off++] = 0xf7;
6996 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6997 pbCodeBuf[off++] = 0;
6998 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
6999 pbCodeBuf[off++] = 0xff;
7000 pbCodeBuf[off++] = 0xff;
7001
7002 /* je/jz +1 */
7003 pbCodeBuf[off++] = 0x74;
7004 pbCodeBuf[off++] = 0x01;
7005
7006 /* int3 */
7007 pbCodeBuf[off++] = 0xcc;
7008 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7009 }
7010 else
7011 {
7012 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7013 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
7014 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
7015 }
7016
7017# elif defined(RT_ARCH_ARM64)
7018 /* mov TMP0, [gstreg] */
7019 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
7020
7021 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
7022 /* sub tmp0, tmp0, idxReg */
7023 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
7024 /* cbz tmp0, +1 */
7025 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
7026 /* brk #0x1000+enmGstReg */
7027 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
7028 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7029
7030# else
7031# error "Port me!"
7032# endif
7033 return off;
7034}
7035
7036
7037# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7038# ifdef RT_ARCH_AMD64
7039/**
7040 * Helper for AMD64 to emit code which checks the low 128-bits of the given SIMD register against the given vCPU offset.
7041 */
7042DECL_FORCE_INLINE_THROW(uint32_t) iemNativeEmitGuestSimdRegValueCheckVCpuU128(uint8_t * const pbCodeBuf, uint32_t off, uint8_t idxSimdReg, uint32_t offVCpu)
7043{
7044 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
7045 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7046 if (idxSimdReg >= 8)
7047 pbCodeBuf[off++] = X86_OP_REX_R;
7048 pbCodeBuf[off++] = 0x0f;
7049 pbCodeBuf[off++] = 0x38;
7050 pbCodeBuf[off++] = 0x29;
7051 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxSimdReg, offVCpu);
7052
7053 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
7054 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7055 pbCodeBuf[off++] = X86_OP_REX_W
7056 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
7057 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
7058 pbCodeBuf[off++] = 0x0f;
7059 pbCodeBuf[off++] = 0x3a;
7060 pbCodeBuf[off++] = 0x16;
7061 pbCodeBuf[off++] = 0xeb;
7062 pbCodeBuf[off++] = 0x00;
7063
7064 /* cmp tmp0, 0xffffffffffffffff. */
7065 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
7066 pbCodeBuf[off++] = 0x83;
7067 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
7068 pbCodeBuf[off++] = 0xff;
7069
7070 /* je/jz +1 */
7071 pbCodeBuf[off++] = 0x74;
7072 pbCodeBuf[off++] = 0x01;
7073
7074 /* int3 */
7075 pbCodeBuf[off++] = 0xcc;
7076
7077 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
7078 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
7079 pbCodeBuf[off++] = X86_OP_REX_W
7080 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
7081 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
7082 pbCodeBuf[off++] = 0x0f;
7083 pbCodeBuf[off++] = 0x3a;
7084 pbCodeBuf[off++] = 0x16;
7085 pbCodeBuf[off++] = 0xeb;
7086 pbCodeBuf[off++] = 0x01;
7087
7088 /* cmp tmp0, 0xffffffffffffffff. */
7089 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
7090 pbCodeBuf[off++] = 0x83;
7091 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
7092 pbCodeBuf[off++] = 0xff;
7093
7094 /* je/jz +1 */
7095 pbCodeBuf[off++] = 0x74;
7096 pbCodeBuf[off++] = 0x01;
7097
7098 /* int3 */
7099 pbCodeBuf[off++] = 0xcc;
7100
7101 return off;
7102}
7103# endif
7104
7105
7106/**
7107 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
7108 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
7109 * instruction if that's not the case.
7110 *
7111 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
7112 * Trashes EFLAGS on AMD64.
7113 */
7114DECL_HIDDEN_THROW(uint32_t)
7115iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
7116 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
7117{
7118 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
7119 if ( ( enmLoadSz == kIemNativeGstSimdRegLdStSz_256
7120 && ( IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)
7121 || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
7122 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128
7123 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
7124 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_High128
7125 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
7126 return off;
7127
7128# ifdef RT_ARCH_AMD64
7129 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
7130 {
7131 /* movdqa vectmp0, idxSimdReg */
7132 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
7133
7134 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
7135
7136 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
7137 g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
7138 }
7139
7140 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
7141 {
7142 /* Due to the fact that CPUMCTX stores the high 128-bit separately we need to do this all over again for the high part. */
7143 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 50);
7144
7145 /* vextracti128 vectmp0, idxSimdReg, 1 */
7146 pbCodeBuf[off++] = X86_OP_VEX3;
7147 pbCodeBuf[off++] = (idxSimdReg < 8 ? X86_OP_VEX3_BYTE1_R : 0)
7148 | X86_OP_VEX3_BYTE1_X
7149 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? X86_OP_VEX3_BYTE1_B : 0)
7150 | 0x03; /* Opcode map */
7151 pbCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX3_BYTE2_P_066H);
7152 pbCodeBuf[off++] = 0x39;
7153 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxSimdReg & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
7154 pbCodeBuf[off++] = 0x01;
7155
7156 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
7157 g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
7158 }
7159# elif defined(RT_ARCH_ARM64)
7160 /* mov vectmp0, [gstreg] */
7161 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
7162
7163 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
7164 {
7165 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
7166 /* eor vectmp0, vectmp0, idxSimdReg */
7167 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
7168 /* uaddlv vectmp0, vectmp0.16B */
7169 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, kArmv8InstrUAddLVSz_16B);
7170 /* umov tmp0, vectmp0.H[0] */
7171 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
7172 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
7173 /* cbz tmp0, +1 */
7174 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
7175 /* brk #0x1000+enmGstReg */
7176 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
7177 }
7178
7179 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
7180 {
7181 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
7182 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
7183 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg + 1);
7184 /* uaddlv vectmp0 + 1, (vectmp0 + 1).16B */
7185 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, kArmv8InstrUAddLVSz_16B);
7186 /* umov tmp0, (vectmp0 + 1).H[0] */
7187 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
7188 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
7189 /* cbz tmp0, +1 */
7190 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
7191 /* brk #0x1000+enmGstReg */
7192 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
7193 }
7194
7195# else
7196# error "Port me!"
7197# endif
7198
7199 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7200 return off;
7201}
7202# endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
7203
7204
7205/**
7206 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
7207 * important bits.
7208 *
7209 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
7210 * Trashes EFLAGS on AMD64.
7211 */
7212DECL_HIDDEN_THROW(uint32_t)
7213iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
7214{
7215 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
7216 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
7217 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
7218 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
7219
7220#ifdef RT_ARCH_AMD64
7221 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
7222
7223 /* je/jz +1 */
7224 pbCodeBuf[off++] = 0x74;
7225 pbCodeBuf[off++] = 0x01;
7226
7227 /* int3 */
7228 pbCodeBuf[off++] = 0xcc;
7229
7230# elif defined(RT_ARCH_ARM64)
7231 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7232
7233 /* b.eq +1 */
7234 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
7235 /* brk #0x2000 */
7236 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
7237
7238# else
7239# error "Port me!"
7240# endif
7241 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7242
7243 iemNativeRegFreeTmp(pReNative, idxRegTmp);
7244 return off;
7245}
7246
7247#endif /* VBOX_STRICT */
7248
7249
7250#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
7251/**
7252 * Worker for IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK.
7253 */
7254DECL_HIDDEN_THROW(uint32_t)
7255iemNativeEmitEFlagsSkippingCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflNeeded)
7256{
7257 uint32_t const offVCpu = RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags);
7258
7259 fEflNeeded &= X86_EFL_STATUS_BITS;
7260 if (fEflNeeded)
7261 {
7262# ifdef RT_ARCH_AMD64
7263 /* test dword [pVCpu + offVCpu], imm32 */
7264 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7265 if (fEflNeeded <= 0xff)
7266 {
7267 pCodeBuf[off++] = 0xf6;
7268 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
7269 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
7270 }
7271 else
7272 {
7273 pCodeBuf[off++] = 0xf7;
7274 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
7275 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
7276 pCodeBuf[off++] = RT_BYTE2(fEflNeeded);
7277 pCodeBuf[off++] = RT_BYTE3(fEflNeeded);
7278 pCodeBuf[off++] = RT_BYTE4(fEflNeeded);
7279 }
7280 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7281
7282# else
7283 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
7284 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, offVCpu);
7285 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp, fEflNeeded);
7286# ifdef RT_ARCH_ARM64
7287 off = iemNativeEmitJzToFixed(pReNative, off, off + 2);
7288 off = iemNativeEmitBrk(pReNative, off, 0x7777);
7289# else
7290# error "Port me!"
7291# endif
7292 iemNativeRegFreeTmp(pReNative, idxRegTmp);
7293# endif
7294 }
7295 return off;
7296}
7297#endif /* IEMNATIVE_STRICT_EFLAGS_SKIPPING */
7298
7299
7300/**
7301 * Emits a code for checking the return code of a call and rcPassUp, returning
7302 * from the code if either are non-zero.
7303 */
7304DECL_HIDDEN_THROW(uint32_t)
7305iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
7306{
7307#ifdef RT_ARCH_AMD64
7308 /*
7309 * AMD64: eax = call status code.
7310 */
7311
7312 /* edx = rcPassUp */
7313 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
7314# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7315 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
7316# endif
7317
7318 /* edx = eax | rcPassUp */
7319 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7320 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
7321 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
7322 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7323
7324 /* Jump to non-zero status return path. */
7325 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
7326
7327 /* done. */
7328
7329#elif RT_ARCH_ARM64
7330 /*
7331 * ARM64: w0 = call status code.
7332 */
7333# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7334 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
7335# endif
7336 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
7337
7338 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
7339
7340 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
7341
7342 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
7343 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
7344 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
7345
7346#else
7347# error "port me"
7348#endif
7349 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7350 RT_NOREF_PV(idxInstr);
7351 return off;
7352}
7353
7354
7355/**
7356 * Emits code to check if the content of @a idxAddrReg is a canonical address,
7357 * raising a \#GP(0) if it isn't.
7358 *
7359 * @returns New code buffer offset, UINT32_MAX on failure.
7360 * @param pReNative The native recompile state.
7361 * @param off The code buffer offset.
7362 * @param idxAddrReg The host register with the address to check.
7363 * @param idxInstr The current instruction.
7364 */
7365DECL_HIDDEN_THROW(uint32_t)
7366iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
7367{
7368 /*
7369 * Make sure we don't have any outstanding guest register writes as we may
7370 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
7371 */
7372 off = iemNativeRegFlushPendingWrites(pReNative, off);
7373
7374#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7375 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7376#else
7377 RT_NOREF(idxInstr);
7378#endif
7379
7380#ifdef RT_ARCH_AMD64
7381 /*
7382 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
7383 * return raisexcpt();
7384 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
7385 */
7386 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
7387
7388 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
7389 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
7390 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
7391 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
7392 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
7393
7394 iemNativeRegFreeTmp(pReNative, iTmpReg);
7395
7396#elif defined(RT_ARCH_ARM64)
7397 /*
7398 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
7399 * return raisexcpt();
7400 * ----
7401 * mov x1, 0x800000000000
7402 * add x1, x0, x1
7403 * cmp xzr, x1, lsr 48
7404 * b.ne .Lraisexcpt
7405 */
7406 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
7407
7408 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
7409 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
7410 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
7411 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
7412
7413 iemNativeRegFreeTmp(pReNative, iTmpReg);
7414
7415#else
7416# error "Port me"
7417#endif
7418 return off;
7419}
7420
7421
7422/**
7423 * Emits code to check if that the content of @a idxAddrReg is within the limit
7424 * of CS, raising a \#GP(0) if it isn't.
7425 *
7426 * @returns New code buffer offset; throws VBox status code on error.
7427 * @param pReNative The native recompile state.
7428 * @param off The code buffer offset.
7429 * @param idxAddrReg The host register (32-bit) with the address to
7430 * check.
7431 * @param idxInstr The current instruction.
7432 */
7433DECL_HIDDEN_THROW(uint32_t)
7434iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7435 uint8_t idxAddrReg, uint8_t idxInstr)
7436{
7437 /*
7438 * Make sure we don't have any outstanding guest register writes as we may
7439 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
7440 */
7441 off = iemNativeRegFlushPendingWrites(pReNative, off);
7442
7443#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7444 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7445#else
7446 RT_NOREF(idxInstr);
7447#endif
7448
7449 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
7450 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
7451 kIemNativeGstRegUse_ReadOnly);
7452
7453 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
7454 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
7455
7456 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
7457 return off;
7458}
7459
7460
7461/**
7462 * Emits a call to a CImpl function or something similar.
7463 */
7464DECL_HIDDEN_THROW(uint32_t)
7465iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
7466 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
7467{
7468 /* Writeback everything. */
7469 off = iemNativeRegFlushPendingWrites(pReNative, off);
7470
7471 /*
7472 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
7473 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
7474 */
7475 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
7476 fGstShwFlush
7477 | RT_BIT_64(kIemNativeGstReg_Pc)
7478 | RT_BIT_64(kIemNativeGstReg_EFlags));
7479 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
7480
7481 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
7482
7483 /*
7484 * Load the parameters.
7485 */
7486#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
7487 /* Special code the hidden VBOXSTRICTRC pointer. */
7488 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7489 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
7490 if (cAddParams > 0)
7491 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
7492 if (cAddParams > 1)
7493 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
7494 if (cAddParams > 2)
7495 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
7496 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
7497
7498#else
7499 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
7500 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7501 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
7502 if (cAddParams > 0)
7503 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
7504 if (cAddParams > 1)
7505 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
7506 if (cAddParams > 2)
7507# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
7508 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
7509# else
7510 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
7511# endif
7512#endif
7513
7514 /*
7515 * Make the call.
7516 */
7517 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
7518
7519#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
7520 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
7521#endif
7522
7523 /*
7524 * Check the status code.
7525 */
7526 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
7527}
7528
7529
7530/**
7531 * Emits a call to a threaded worker function.
7532 */
7533DECL_HIDDEN_THROW(uint32_t)
7534iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
7535{
7536 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
7537
7538 /* We don't know what the threaded function is doing so we must flush all pending writes. */
7539 off = iemNativeRegFlushPendingWrites(pReNative, off);
7540
7541 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
7542 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
7543
7544#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7545 /* The threaded function may throw / long jmp, so set current instruction
7546 number if we're counting. */
7547 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7548#endif
7549
7550 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
7551
7552#ifdef RT_ARCH_AMD64
7553 /* Load the parameters and emit the call. */
7554# ifdef RT_OS_WINDOWS
7555# ifndef VBOXSTRICTRC_STRICT_ENABLED
7556 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
7557 if (cParams > 0)
7558 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
7559 if (cParams > 1)
7560 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
7561 if (cParams > 2)
7562 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
7563# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
7564 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
7565 if (cParams > 0)
7566 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
7567 if (cParams > 1)
7568 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
7569 if (cParams > 2)
7570 {
7571 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
7572 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
7573 }
7574 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
7575# endif /* VBOXSTRICTRC_STRICT_ENABLED */
7576# else
7577 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
7578 if (cParams > 0)
7579 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
7580 if (cParams > 1)
7581 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
7582 if (cParams > 2)
7583 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
7584# endif
7585
7586 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
7587
7588# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
7589 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
7590# endif
7591
7592#elif RT_ARCH_ARM64
7593 /*
7594 * ARM64:
7595 */
7596 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7597 if (cParams > 0)
7598 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
7599 if (cParams > 1)
7600 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
7601 if (cParams > 2)
7602 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
7603
7604 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
7605
7606#else
7607# error "port me"
7608#endif
7609
7610 /*
7611 * Check the status code.
7612 */
7613 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
7614
7615 return off;
7616}
7617
7618#ifdef VBOX_WITH_STATISTICS
7619/**
7620 * Emits code to update the thread call statistics.
7621 */
7622DECL_INLINE_THROW(uint32_t)
7623iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
7624{
7625 /*
7626 * Update threaded function stats.
7627 */
7628 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
7629 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
7630# if defined(RT_ARCH_ARM64)
7631 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
7632 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
7633 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
7634 iemNativeRegFreeTmp(pReNative, idxTmp1);
7635 iemNativeRegFreeTmp(pReNative, idxTmp2);
7636# else
7637 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
7638# endif
7639 return off;
7640}
7641#endif /* VBOX_WITH_STATISTICS */
7642
7643
7644/**
7645 * Emits the code at the ReturnWithFlags label (returns
7646 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
7647 */
7648static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7649{
7650 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
7651 if (idxLabel != UINT32_MAX)
7652 {
7653 iemNativeLabelDefine(pReNative, idxLabel, off);
7654
7655 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
7656
7657 /* jump back to the return sequence. */
7658 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7659 }
7660 return off;
7661}
7662
7663
7664/**
7665 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
7666 */
7667static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7668{
7669 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
7670 if (idxLabel != UINT32_MAX)
7671 {
7672 iemNativeLabelDefine(pReNative, idxLabel, off);
7673
7674 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
7675
7676 /* jump back to the return sequence. */
7677 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7678 }
7679 return off;
7680}
7681
7682
7683/**
7684 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
7685 */
7686static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7687{
7688 /*
7689 * Generate the rc + rcPassUp fiddling code if needed.
7690 */
7691 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
7692 if (idxLabel != UINT32_MAX)
7693 {
7694 iemNativeLabelDefine(pReNative, idxLabel, off);
7695
7696 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
7697#ifdef RT_ARCH_AMD64
7698# ifdef RT_OS_WINDOWS
7699# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7700 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
7701# endif
7702 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
7703 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
7704# else
7705 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
7706 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
7707# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7708 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
7709# endif
7710# endif
7711# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7712 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
7713# endif
7714
7715#else
7716 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
7717 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7718 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
7719#endif
7720
7721 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
7722 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7723 }
7724 return off;
7725}
7726
7727
7728/**
7729 * Emits a standard epilog.
7730 */
7731static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
7732{
7733 *pidxReturnLabel = UINT32_MAX;
7734
7735 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
7736 off = iemNativeRegFlushPendingWrites(pReNative, off);
7737
7738 /*
7739 * Successful return, so clear the return register (eax, w0).
7740 */
7741 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
7742
7743 /*
7744 * Define label for common return point.
7745 */
7746 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
7747 *pidxReturnLabel = idxReturn;
7748
7749 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
7750
7751 /*
7752 * Restore registers and return.
7753 */
7754#ifdef RT_ARCH_AMD64
7755 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
7756
7757 /* Reposition esp at the r15 restore point. */
7758 pbCodeBuf[off++] = X86_OP_REX_W;
7759 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
7760 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
7761 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
7762
7763 /* Pop non-volatile registers and return */
7764 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
7765 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
7766 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
7767 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
7768 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
7769 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
7770 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
7771 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
7772# ifdef RT_OS_WINDOWS
7773 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
7774 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
7775# endif
7776 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
7777 pbCodeBuf[off++] = 0xc9; /* leave */
7778 pbCodeBuf[off++] = 0xc3; /* ret */
7779 pbCodeBuf[off++] = 0xcc; /* int3 poison */
7780
7781#elif RT_ARCH_ARM64
7782 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7783
7784 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
7785 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
7786 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
7787 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
7788 IEMNATIVE_FRAME_VAR_SIZE / 8);
7789 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
7790 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7791 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
7792 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7793 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
7794 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7795 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
7796 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7797 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
7798 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7799 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
7800 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
7801
7802 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
7803 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
7804 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
7805 IEMNATIVE_FRAME_SAVE_REG_SIZE);
7806
7807 /* retab / ret */
7808# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
7809 if (1)
7810 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
7811 else
7812# endif
7813 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
7814
7815#else
7816# error "port me"
7817#endif
7818 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7819
7820 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
7821}
7822
7823
7824/**
7825 * Emits a standard prolog.
7826 */
7827static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7828{
7829#ifdef RT_ARCH_AMD64
7830 /*
7831 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
7832 * reserving 64 bytes for stack variables plus 4 non-register argument
7833 * slots. Fixed register assignment: xBX = pReNative;
7834 *
7835 * Since we always do the same register spilling, we can use the same
7836 * unwind description for all the code.
7837 */
7838 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7839 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
7840 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
7841 pbCodeBuf[off++] = 0x8b;
7842 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
7843 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
7844 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
7845# ifdef RT_OS_WINDOWS
7846 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
7847 pbCodeBuf[off++] = 0x8b;
7848 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
7849 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
7850 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
7851# else
7852 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
7853 pbCodeBuf[off++] = 0x8b;
7854 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
7855# endif
7856 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
7857 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
7858 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
7859 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
7860 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
7861 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
7862 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
7863 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
7864
7865# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
7866 /* Save the frame pointer. */
7867 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
7868# endif
7869
7870 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
7871 X86_GREG_xSP,
7872 IEMNATIVE_FRAME_ALIGN_SIZE
7873 + IEMNATIVE_FRAME_VAR_SIZE
7874 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
7875 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
7876 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
7877 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
7878 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
7879
7880#elif RT_ARCH_ARM64
7881 /*
7882 * We set up a stack frame exactly like on x86, only we have to push the
7883 * return address our selves here. We save all non-volatile registers.
7884 */
7885 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
7886
7887# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
7888 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
7889 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
7890 * in any way conditional, so just emitting this instructions now and hoping for the best... */
7891 /* pacibsp */
7892 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
7893# endif
7894
7895 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
7896 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
7897 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
7898 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
7899 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
7900 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
7901 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7902 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
7903 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7904 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
7905 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7906 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
7907 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7908 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
7909 /* Save the BP and LR (ret address) registers at the top of the frame. */
7910 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7911 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
7912 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
7913 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
7914 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
7915 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
7916
7917 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
7918 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
7919
7920 /* mov r28, r0 */
7921 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
7922 /* mov r27, r1 */
7923 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
7924
7925# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
7926 /* Save the frame pointer. */
7927 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
7928 ARMV8_A64_REG_X2);
7929# endif
7930
7931#else
7932# error "port me"
7933#endif
7934 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7935 return off;
7936}
7937
7938
7939/*********************************************************************************************************************************
7940* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
7941*********************************************************************************************************************************/
7942
7943/**
7944 * Internal work that allocates a variable with kind set to
7945 * kIemNativeVarKind_Invalid and no current stack allocation.
7946 *
7947 * The kind will either be set by the caller or later when the variable is first
7948 * assigned a value.
7949 *
7950 * @returns Unpacked index.
7951 * @internal
7952 */
7953static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7954{
7955 Assert(cbType > 0 && cbType <= 64);
7956 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
7957 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
7958 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
7959 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7960 pReNative->Core.aVars[idxVar].cbVar = cbType;
7961 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7962 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7963 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
7964 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
7965 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
7966 pReNative->Core.aVars[idxVar].fRegAcquired = false;
7967 pReNative->Core.aVars[idxVar].u.uValue = 0;
7968#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7969 pReNative->Core.aVars[idxVar].fSimdReg = false;
7970#endif
7971 return idxVar;
7972}
7973
7974
7975/**
7976 * Internal work that allocates an argument variable w/o setting enmKind.
7977 *
7978 * @returns Unpacked index.
7979 * @internal
7980 */
7981static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7982{
7983 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
7984 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7985 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
7986
7987 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7988 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
7989 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
7990 return idxVar;
7991}
7992
7993
7994/**
7995 * Gets the stack slot for a stack variable, allocating one if necessary.
7996 *
7997 * Calling this function implies that the stack slot will contain a valid
7998 * variable value. The caller deals with any register currently assigned to the
7999 * variable, typically by spilling it into the stack slot.
8000 *
8001 * @returns The stack slot number.
8002 * @param pReNative The recompiler state.
8003 * @param idxVar The variable.
8004 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
8005 */
8006DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8007{
8008 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8009 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8010 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
8011
8012 /* Already got a slot? */
8013 uint8_t const idxStackSlot = pVar->idxStackSlot;
8014 if (idxStackSlot != UINT8_MAX)
8015 {
8016 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
8017 return idxStackSlot;
8018 }
8019
8020 /*
8021 * A single slot is easy to allocate.
8022 * Allocate them from the top end, closest to BP, to reduce the displacement.
8023 */
8024 if (pVar->cbVar <= sizeof(uint64_t))
8025 {
8026 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
8027 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
8028 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
8029 pVar->idxStackSlot = (uint8_t)iSlot;
8030 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
8031 return (uint8_t)iSlot;
8032 }
8033
8034 /*
8035 * We need more than one stack slot.
8036 *
8037 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
8038 */
8039 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
8040 Assert(pVar->cbVar <= 64);
8041 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
8042 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
8043 uint32_t bmStack = pReNative->Core.bmStack;
8044 while (bmStack != UINT32_MAX)
8045 {
8046 unsigned iSlot = ASMBitLastSetU32(~bmStack);
8047 AssertStmt(iSlot, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
8048 iSlot = (iSlot - 1) & ~fBitAlignMask;
8049 if ((bmStack & ~(fBitAllocMask << iSlot)) == bmStack)
8050 {
8051 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
8052 pVar->idxStackSlot = (uint8_t)iSlot;
8053 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
8054 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
8055 return (uint8_t)iSlot;
8056 }
8057
8058 bmStack |= (fBitAllocMask << iSlot);
8059 }
8060 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
8061}
8062
8063
8064/**
8065 * Changes the variable to a stack variable.
8066 *
8067 * Currently this is s only possible to do the first time the variable is used,
8068 * switching later is can be implemented but not done.
8069 *
8070 * @param pReNative The recompiler state.
8071 * @param idxVar The variable.
8072 * @throws VERR_IEM_VAR_IPE_2
8073 */
8074DECL_HIDDEN_THROW(void) iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8075{
8076 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8077 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8078 if (pVar->enmKind != kIemNativeVarKind_Stack)
8079 {
8080 /* We could in theory transition from immediate to stack as well, but it
8081 would involve the caller doing work storing the value on the stack. So,
8082 till that's required we only allow transition from invalid. */
8083 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8084 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8085 pVar->enmKind = kIemNativeVarKind_Stack;
8086
8087 /* Note! We don't allocate a stack slot here, that's only done when a
8088 slot is actually needed to hold a variable value. */
8089 }
8090}
8091
8092
8093/**
8094 * Sets it to a variable with a constant value.
8095 *
8096 * This does not require stack storage as we know the value and can always
8097 * reload it, unless of course it's referenced.
8098 *
8099 * @param pReNative The recompiler state.
8100 * @param idxVar The variable.
8101 * @param uValue The immediate value.
8102 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
8103 */
8104DECL_HIDDEN_THROW(void) iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
8105{
8106 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8107 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8108 if (pVar->enmKind != kIemNativeVarKind_Immediate)
8109 {
8110 /* Only simple transitions for now. */
8111 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8112 pVar->enmKind = kIemNativeVarKind_Immediate;
8113 }
8114 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8115
8116 pVar->u.uValue = uValue;
8117 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
8118 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
8119 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
8120}
8121
8122
8123/**
8124 * Sets the variable to a reference (pointer) to @a idxOtherVar.
8125 *
8126 * This does not require stack storage as we know the value and can always
8127 * reload it. Loading is postponed till needed.
8128 *
8129 * @param pReNative The recompiler state.
8130 * @param idxVar The variable. Unpacked.
8131 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
8132 *
8133 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
8134 * @internal
8135 */
8136static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
8137{
8138 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
8139 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
8140
8141 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
8142 {
8143 /* Only simple transitions for now. */
8144 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
8145 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8146 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
8147 }
8148 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8149
8150 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
8151
8152 /* Update the other variable, ensure it's a stack variable. */
8153 /** @todo handle variables with const values... that'll go boom now. */
8154 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
8155 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8156}
8157
8158
8159/**
8160 * Sets the variable to a reference (pointer) to a guest register reference.
8161 *
8162 * This does not require stack storage as we know the value and can always
8163 * reload it. Loading is postponed till needed.
8164 *
8165 * @param pReNative The recompiler state.
8166 * @param idxVar The variable.
8167 * @param enmRegClass The class guest registers to reference.
8168 * @param idxReg The register within @a enmRegClass to reference.
8169 *
8170 * @throws VERR_IEM_VAR_IPE_2
8171 */
8172DECL_HIDDEN_THROW(void) iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
8173 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
8174{
8175 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8176 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8177
8178 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
8179 {
8180 /* Only simple transitions for now. */
8181 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8182 pVar->enmKind = kIemNativeVarKind_GstRegRef;
8183 }
8184 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8185
8186 pVar->u.GstRegRef.enmClass = enmRegClass;
8187 pVar->u.GstRegRef.idx = idxReg;
8188}
8189
8190
8191DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
8192{
8193 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
8194}
8195
8196
8197DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
8198{
8199 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
8200
8201 /* Since we're using a generic uint64_t value type, we must truncate it if
8202 the variable is smaller otherwise we may end up with too large value when
8203 scaling up a imm8 w/ sign-extension.
8204
8205 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
8206 in the bios, bx=1) when running on arm, because clang expect 16-bit
8207 register parameters to have bits 16 and up set to zero. Instead of
8208 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
8209 CF value in the result. */
8210 switch (cbType)
8211 {
8212 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
8213 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
8214 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
8215 }
8216 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
8217 return idxVar;
8218}
8219
8220
8221DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
8222{
8223 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
8224 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
8225 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
8226 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
8227 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
8228 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
8229
8230 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
8231 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
8232 return idxArgVar;
8233}
8234
8235
8236DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
8237{
8238 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
8239 /* Don't set to stack now, leave that to the first use as for instance
8240 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
8241 return idxVar;
8242}
8243
8244
8245DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
8246{
8247 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
8248
8249 /* Since we're using a generic uint64_t value type, we must truncate it if
8250 the variable is smaller otherwise we may end up with too large value when
8251 scaling up a imm8 w/ sign-extension. */
8252 switch (cbType)
8253 {
8254 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
8255 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
8256 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
8257 }
8258 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
8259 return idxVar;
8260}
8261
8262
8263DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocAssign(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t cbType, uint8_t idxVarOther)
8264{
8265 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
8266 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8267
8268 uint8_t const idxVarOtherReg = iemNativeVarRegisterAcquire(pReNative, idxVarOther, poff, true /*fInitialized*/);
8269 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, poff);
8270
8271 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxVarReg, idxVarOtherReg);
8272
8273 /* Truncate the value to this variables size. */
8274 switch (cbType)
8275 {
8276 case sizeof(uint8_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xff)); break;
8277 case sizeof(uint16_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffff)); break;
8278 case sizeof(uint32_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffffffff)); break;
8279 }
8280
8281 iemNativeVarRegisterRelease(pReNative, idxVarOther);
8282 iemNativeVarRegisterRelease(pReNative, idxVar);
8283 return idxVar;
8284}
8285
8286
8287/**
8288 * Makes sure variable @a idxVar has a register assigned to it and that it stays
8289 * fixed till we call iemNativeVarRegisterRelease.
8290 *
8291 * @returns The host register number.
8292 * @param pReNative The recompiler state.
8293 * @param idxVar The variable.
8294 * @param poff Pointer to the instruction buffer offset.
8295 * In case a register needs to be freed up or the value
8296 * loaded off the stack.
8297 * @param fInitialized Set if the variable must already have been initialized.
8298 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
8299 * the case.
8300 * @param idxRegPref Preferred register number or UINT8_MAX.
8301 */
8302DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
8303 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
8304{
8305 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8306 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8307 Assert(pVar->cbVar <= 8);
8308 Assert(!pVar->fRegAcquired);
8309
8310 uint8_t idxReg = pVar->idxReg;
8311 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8312 {
8313 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
8314 && pVar->enmKind < kIemNativeVarKind_End);
8315 pVar->fRegAcquired = true;
8316 return idxReg;
8317 }
8318
8319 /*
8320 * If the kind of variable has not yet been set, default to 'stack'.
8321 */
8322 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
8323 && pVar->enmKind < kIemNativeVarKind_End);
8324 if (pVar->enmKind == kIemNativeVarKind_Invalid)
8325 iemNativeVarSetKindToStack(pReNative, idxVar);
8326
8327 /*
8328 * We have to allocate a register for the variable, even if its a stack one
8329 * as we don't know if there are modification being made to it before its
8330 * finalized (todo: analyze and insert hints about that?).
8331 *
8332 * If we can, we try get the correct register for argument variables. This
8333 * is assuming that most argument variables are fetched as close as possible
8334 * to the actual call, so that there aren't any interfering hidden calls
8335 * (memory accesses, etc) inbetween.
8336 *
8337 * If we cannot or it's a variable, we make sure no argument registers
8338 * that will be used by this MC block will be allocated here, and we always
8339 * prefer non-volatile registers to avoid needing to spill stuff for internal
8340 * call.
8341 */
8342 /** @todo Detect too early argument value fetches and warn about hidden
8343 * calls causing less optimal code to be generated in the python script. */
8344
8345 uint8_t const uArgNo = pVar->uArgNo;
8346 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
8347 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
8348 {
8349 idxReg = g_aidxIemNativeCallRegs[uArgNo];
8350
8351#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8352 /* Writeback any dirty shadow registers we are about to unshadow. */
8353 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
8354#endif
8355
8356 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
8357 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
8358 }
8359 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
8360 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
8361 {
8362 /** @todo there must be a better way for this and boot cArgsX? */
8363 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgsX, IEMNATIVE_CALL_ARG_GREG_COUNT)];
8364 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
8365 & ~pReNative->Core.bmHstRegsWithGstShadow
8366 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
8367 & fNotArgsMask;
8368 if (fRegs)
8369 {
8370 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
8371 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
8372 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
8373 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
8374 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
8375 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
8376 }
8377 else
8378 {
8379 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
8380 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
8381 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
8382 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
8383 }
8384 }
8385 else
8386 {
8387 idxReg = idxRegPref;
8388 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
8389 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
8390 }
8391 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
8392 pVar->idxReg = idxReg;
8393
8394#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8395 pVar->fSimdReg = false;
8396#endif
8397
8398 /*
8399 * Load it off the stack if we've got a stack slot.
8400 */
8401 uint8_t const idxStackSlot = pVar->idxStackSlot;
8402 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
8403 {
8404 Assert(fInitialized);
8405 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
8406 switch (pVar->cbVar)
8407 {
8408 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
8409 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
8410 case 3: AssertFailed(); RT_FALL_THRU();
8411 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
8412 default: AssertFailed(); RT_FALL_THRU();
8413 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
8414 }
8415 }
8416 else
8417 {
8418 Assert(idxStackSlot == UINT8_MAX);
8419 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8420 }
8421 pVar->fRegAcquired = true;
8422 return idxReg;
8423}
8424
8425
8426#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8427/**
8428 * Makes sure variable @a idxVar has a SIMD register assigned to it and that it stays
8429 * fixed till we call iemNativeVarRegisterRelease.
8430 *
8431 * @returns The host register number.
8432 * @param pReNative The recompiler state.
8433 * @param idxVar The variable.
8434 * @param poff Pointer to the instruction buffer offset.
8435 * In case a register needs to be freed up or the value
8436 * loaded off the stack.
8437 * @param fInitialized Set if the variable must already have been initialized.
8438 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
8439 * the case.
8440 * @param idxRegPref Preferred SIMD register number or UINT8_MAX.
8441 */
8442DECL_HIDDEN_THROW(uint8_t) iemNativeVarSimdRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
8443 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
8444{
8445 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8446 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8447 Assert( pVar->cbVar == sizeof(RTUINT128U)
8448 || pVar->cbVar == sizeof(RTUINT256U));
8449 Assert(!pVar->fRegAcquired);
8450
8451 uint8_t idxReg = pVar->idxReg;
8452 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs))
8453 {
8454 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
8455 && pVar->enmKind < kIemNativeVarKind_End);
8456 pVar->fRegAcquired = true;
8457 return idxReg;
8458 }
8459
8460 /*
8461 * If the kind of variable has not yet been set, default to 'stack'.
8462 */
8463 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
8464 && pVar->enmKind < kIemNativeVarKind_End);
8465 if (pVar->enmKind == kIemNativeVarKind_Invalid)
8466 iemNativeVarSetKindToStack(pReNative, idxVar);
8467
8468 /*
8469 * We have to allocate a register for the variable, even if its a stack one
8470 * as we don't know if there are modification being made to it before its
8471 * finalized (todo: analyze and insert hints about that?).
8472 *
8473 * If we can, we try get the correct register for argument variables. This
8474 * is assuming that most argument variables are fetched as close as possible
8475 * to the actual call, so that there aren't any interfering hidden calls
8476 * (memory accesses, etc) inbetween.
8477 *
8478 * If we cannot or it's a variable, we make sure no argument registers
8479 * that will be used by this MC block will be allocated here, and we always
8480 * prefer non-volatile registers to avoid needing to spill stuff for internal
8481 * call.
8482 */
8483 /** @todo Detect too early argument value fetches and warn about hidden
8484 * calls causing less optimal code to be generated in the python script. */
8485
8486 uint8_t const uArgNo = pVar->uArgNo;
8487 Assert(uArgNo == UINT8_MAX); RT_NOREF(uArgNo); /* No SIMD registers as arguments for now. */
8488
8489 /* SIMD is bit simpler for now because there is no support for arguments. */
8490 if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
8491 || (pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegPref)))
8492 {
8493 uint32_t const fNotArgsMask = UINT32_MAX; //~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
8494 uint32_t const fRegs = ~pReNative->Core.bmHstSimdRegs
8495 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
8496 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
8497 & fNotArgsMask;
8498 if (fRegs)
8499 {
8500 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
8501 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
8502 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows == 0);
8503 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg)));
8504 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
8505 }
8506 else
8507 {
8508 idxReg = iemNativeSimdRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
8509 IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & fNotArgsMask);
8510 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
8511 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
8512 }
8513 }
8514 else
8515 {
8516 idxReg = idxRegPref;
8517 AssertReleaseFailed(); //iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
8518 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
8519 }
8520 iemNativeSimdRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
8521
8522 pVar->fSimdReg = true;
8523 pVar->idxReg = idxReg;
8524
8525 /*
8526 * Load it off the stack if we've got a stack slot.
8527 */
8528 uint8_t const idxStackSlot = pVar->idxStackSlot;
8529 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
8530 {
8531 Assert(fInitialized);
8532 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
8533 switch (pVar->cbVar)
8534 {
8535 case sizeof(RTUINT128U): *poff = iemNativeEmitLoadVecRegByBpU128(pReNative, *poff, idxReg, offDispBp); break;
8536 default: AssertFailed(); RT_FALL_THRU();
8537 case sizeof(RTUINT256U): *poff = iemNativeEmitLoadVecRegByBpU256(pReNative, *poff, idxReg, offDispBp); break;
8538 }
8539 }
8540 else
8541 {
8542 Assert(idxStackSlot == UINT8_MAX);
8543 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8544 }
8545 pVar->fRegAcquired = true;
8546 return idxReg;
8547}
8548#endif
8549
8550
8551/**
8552 * The value of variable @a idxVar will be written in full to the @a enmGstReg
8553 * guest register.
8554 *
8555 * This function makes sure there is a register for it and sets it to be the
8556 * current shadow copy of @a enmGstReg.
8557 *
8558 * @returns The host register number.
8559 * @param pReNative The recompiler state.
8560 * @param idxVar The variable.
8561 * @param enmGstReg The guest register this variable will be written to
8562 * after this call.
8563 * @param poff Pointer to the instruction buffer offset.
8564 * In case a register needs to be freed up or if the
8565 * variable content needs to be loaded off the stack.
8566 *
8567 * @note We DO NOT expect @a idxVar to be an argument variable,
8568 * because we can only in the commit stage of an instruction when this
8569 * function is used.
8570 */
8571DECL_HIDDEN_THROW(uint8_t)
8572iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
8573{
8574 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8575 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8576 Assert(!pVar->fRegAcquired);
8577 AssertMsgStmt( pVar->cbVar <= 8
8578 && ( pVar->enmKind == kIemNativeVarKind_Immediate
8579 || pVar->enmKind == kIemNativeVarKind_Stack),
8580 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
8581 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
8582 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
8583
8584 /*
8585 * This shouldn't ever be used for arguments, unless it's in a weird else
8586 * branch that doesn't do any calling and even then it's questionable.
8587 *
8588 * However, in case someone writes crazy wrong MC code and does register
8589 * updates before making calls, just use the regular register allocator to
8590 * ensure we get a register suitable for the intended argument number.
8591 */
8592 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
8593
8594 /*
8595 * If there is already a register for the variable, we transfer/set the
8596 * guest shadow copy assignment to it.
8597 */
8598 uint8_t idxReg = pVar->idxReg;
8599 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8600 {
8601#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8602 if (enmGstReg >= kIemNativeGstReg_GprFirst && enmGstReg <= kIemNativeGstReg_GprLast)
8603 {
8604# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8605 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
8606 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
8607# endif
8608
8609 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
8610 }
8611#endif
8612
8613 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
8614 {
8615 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
8616 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
8617 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
8618 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
8619 }
8620 else
8621 {
8622 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
8623 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
8624 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
8625 }
8626 /** @todo figure this one out. We need some way of making sure the register isn't
8627 * modified after this point, just in case we start writing crappy MC code. */
8628 pVar->enmGstReg = enmGstReg;
8629 pVar->fRegAcquired = true;
8630 return idxReg;
8631 }
8632 Assert(pVar->uArgNo == UINT8_MAX);
8633
8634 /*
8635 * Because this is supposed to be the commit stage, we're just tag along with the
8636 * temporary register allocator and upgrade it to a variable register.
8637 */
8638 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
8639 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
8640 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
8641 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
8642 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
8643 pVar->idxReg = idxReg;
8644
8645 /*
8646 * Now we need to load the register value.
8647 */
8648 if (pVar->enmKind == kIemNativeVarKind_Immediate)
8649 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
8650 else
8651 {
8652 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8653 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
8654 switch (pVar->cbVar)
8655 {
8656 case sizeof(uint64_t):
8657 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
8658 break;
8659 case sizeof(uint32_t):
8660 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
8661 break;
8662 case sizeof(uint16_t):
8663 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
8664 break;
8665 case sizeof(uint8_t):
8666 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
8667 break;
8668 default:
8669 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
8670 }
8671 }
8672
8673 pVar->fRegAcquired = true;
8674 return idxReg;
8675}
8676
8677
8678/**
8679 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
8680 *
8681 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
8682 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
8683 * requirement of flushing anything in volatile host registers when making a
8684 * call.
8685 *
8686 * @returns New @a off value.
8687 * @param pReNative The recompiler state.
8688 * @param off The code buffer position.
8689 * @param fHstRegsNotToSave Set of registers not to save & restore.
8690 */
8691DECL_HIDDEN_THROW(uint32_t)
8692iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
8693{
8694 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
8695 if (fHstRegs)
8696 {
8697 do
8698 {
8699 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8700 fHstRegs &= ~RT_BIT_32(idxHstReg);
8701
8702 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
8703 {
8704 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
8705 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8706 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8707 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8708 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
8709 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8710 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8711 {
8712 case kIemNativeVarKind_Stack:
8713 {
8714 /* Temporarily spill the variable register. */
8715 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8716 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
8717 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8718 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8719 continue;
8720 }
8721
8722 case kIemNativeVarKind_Immediate:
8723 case kIemNativeVarKind_VarRef:
8724 case kIemNativeVarKind_GstRegRef:
8725 /* It is weird to have any of these loaded at this point. */
8726 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8727 continue;
8728
8729 case kIemNativeVarKind_End:
8730 case kIemNativeVarKind_Invalid:
8731 break;
8732 }
8733 AssertFailed();
8734 }
8735 else
8736 {
8737 /*
8738 * Allocate a temporary stack slot and spill the register to it.
8739 */
8740 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
8741 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
8742 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
8743 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
8744 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
8745 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
8746 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8747 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8748 }
8749 } while (fHstRegs);
8750 }
8751#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8752
8753 /*
8754 * Guest register shadows are flushed to CPUMCTX at the moment and don't need allocating a stack slot
8755 * which would be more difficult due to spanning multiple stack slots and different sizes
8756 * (besides we only have a limited amount of slots at the moment).
8757 *
8758 * However the shadows need to be flushed out as the guest SIMD register might get corrupted by
8759 * the callee. This asserts that the registers were written back earlier and are not in the dirty state.
8760 */
8761 iemNativeSimdRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK);
8762
8763 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
8764 if (fHstRegs)
8765 {
8766 do
8767 {
8768 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8769 fHstRegs &= ~RT_BIT_32(idxHstReg);
8770
8771 /* Fixed reserved and temporary registers don't need saving. */
8772 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved
8773 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp)
8774 continue;
8775
8776 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
8777
8778 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
8779 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8780 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8781 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8782 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
8783 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
8784 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
8785 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
8786 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8787 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8788 {
8789 case kIemNativeVarKind_Stack:
8790 {
8791 /* Temporarily spill the variable register. */
8792 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
8793 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8794 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
8795 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8796 if (cbVar == sizeof(RTUINT128U))
8797 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8798 else
8799 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8800 continue;
8801 }
8802
8803 case kIemNativeVarKind_Immediate:
8804 case kIemNativeVarKind_VarRef:
8805 case kIemNativeVarKind_GstRegRef:
8806 /* It is weird to have any of these loaded at this point. */
8807 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8808 continue;
8809
8810 case kIemNativeVarKind_End:
8811 case kIemNativeVarKind_Invalid:
8812 break;
8813 }
8814 AssertFailed();
8815 } while (fHstRegs);
8816 }
8817#endif
8818 return off;
8819}
8820
8821
8822/**
8823 * Emit code to restore volatile registers after to a call to a helper.
8824 *
8825 * @returns New @a off value.
8826 * @param pReNative The recompiler state.
8827 * @param off The code buffer position.
8828 * @param fHstRegsNotToSave Set of registers not to save & restore.
8829 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
8830 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
8831 */
8832DECL_HIDDEN_THROW(uint32_t)
8833iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
8834{
8835 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
8836 if (fHstRegs)
8837 {
8838 do
8839 {
8840 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8841 fHstRegs &= ~RT_BIT_32(idxHstReg);
8842
8843 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
8844 {
8845 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
8846 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8847 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8848 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8849 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
8850 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8851 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8852 {
8853 case kIemNativeVarKind_Stack:
8854 {
8855 /* Unspill the variable register. */
8856 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8857 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
8858 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8859 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8860 continue;
8861 }
8862
8863 case kIemNativeVarKind_Immediate:
8864 case kIemNativeVarKind_VarRef:
8865 case kIemNativeVarKind_GstRegRef:
8866 /* It is weird to have any of these loaded at this point. */
8867 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8868 continue;
8869
8870 case kIemNativeVarKind_End:
8871 case kIemNativeVarKind_Invalid:
8872 break;
8873 }
8874 AssertFailed();
8875 }
8876 else
8877 {
8878 /*
8879 * Restore from temporary stack slot.
8880 */
8881 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
8882 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
8883 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
8884 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
8885
8886 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8887 }
8888 } while (fHstRegs);
8889 }
8890#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8891 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
8892 if (fHstRegs)
8893 {
8894 do
8895 {
8896 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8897 fHstRegs &= ~RT_BIT_32(idxHstReg);
8898
8899 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp
8900 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved)
8901 continue;
8902 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
8903
8904 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
8905 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8906 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8907 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8908 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
8909 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
8910 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
8911 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
8912 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8913 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8914 {
8915 case kIemNativeVarKind_Stack:
8916 {
8917 /* Unspill the variable register. */
8918 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
8919 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8920 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
8921 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8922
8923 if (cbVar == sizeof(RTUINT128U))
8924 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8925 else
8926 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8927 continue;
8928 }
8929
8930 case kIemNativeVarKind_Immediate:
8931 case kIemNativeVarKind_VarRef:
8932 case kIemNativeVarKind_GstRegRef:
8933 /* It is weird to have any of these loaded at this point. */
8934 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8935 continue;
8936
8937 case kIemNativeVarKind_End:
8938 case kIemNativeVarKind_Invalid:
8939 break;
8940 }
8941 AssertFailed();
8942 } while (fHstRegs);
8943 }
8944#endif
8945 return off;
8946}
8947
8948
8949/**
8950 * Worker that frees the stack slots for variable @a idxVar if any allocated.
8951 *
8952 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
8953 *
8954 * ASSUMES that @a idxVar is valid and unpacked.
8955 */
8956DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8957{
8958 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
8959 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
8960 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
8961 {
8962 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
8963 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
8964 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
8965 Assert(cSlots > 0);
8966 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
8967 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
8968 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
8969 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
8970 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
8971 }
8972 else
8973 Assert(idxStackSlot == UINT8_MAX);
8974}
8975
8976
8977/**
8978 * Worker that frees a single variable.
8979 *
8980 * ASSUMES that @a idxVar is valid and unpacked.
8981 */
8982DECLHIDDEN(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8983{
8984 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
8985 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
8986 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
8987
8988 /* Free the host register first if any assigned. */
8989 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8990#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8991 if ( idxHstReg != UINT8_MAX
8992 && pReNative->Core.aVars[idxVar].fSimdReg)
8993 {
8994 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8995 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8996 pReNative->Core.aHstSimdRegs[idxHstReg].idxVar = UINT8_MAX;
8997 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
8998 }
8999 else
9000#endif
9001 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9002 {
9003 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
9004 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
9005 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
9006 }
9007
9008 /* Free argument mapping. */
9009 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
9010 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
9011 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
9012
9013 /* Free the stack slots. */
9014 iemNativeVarFreeStackSlots(pReNative, idxVar);
9015
9016 /* Free the actual variable. */
9017 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
9018 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
9019}
9020
9021
9022/**
9023 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
9024 */
9025DECLHIDDEN(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
9026{
9027 while (bmVars != 0)
9028 {
9029 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
9030 bmVars &= ~RT_BIT_32(idxVar);
9031
9032#if 1 /** @todo optimize by simplifying this later... */
9033 iemNativeVarFreeOneWorker(pReNative, idxVar);
9034#else
9035 /* Only need to free the host register, the rest is done as bulk updates below. */
9036 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
9037 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9038 {
9039 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
9040 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
9041 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
9042 }
9043#endif
9044 }
9045#if 0 /** @todo optimize by simplifying this later... */
9046 pReNative->Core.bmVars = 0;
9047 pReNative->Core.bmStack = 0;
9048 pReNative->Core.u64ArgVars = UINT64_MAX;
9049#endif
9050}
9051
9052
9053
9054/*********************************************************************************************************************************
9055* Emitters for IEM_MC_CALL_CIMPL_XXX *
9056*********************************************************************************************************************************/
9057
9058/**
9059 * Emits code to load a reference to the given guest register into @a idxGprDst.
9060 */
9061DECL_HIDDEN_THROW(uint32_t)
9062iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
9063 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
9064{
9065#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
9066 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
9067#endif
9068
9069 /*
9070 * Get the offset relative to the CPUMCTX structure.
9071 */
9072 uint32_t offCpumCtx;
9073 switch (enmClass)
9074 {
9075 case kIemNativeGstRegRef_Gpr:
9076 Assert(idxRegInClass < 16);
9077 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
9078 break;
9079
9080 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
9081 Assert(idxRegInClass < 4);
9082 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
9083 break;
9084
9085 case kIemNativeGstRegRef_EFlags:
9086 Assert(idxRegInClass == 0);
9087 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
9088 break;
9089
9090 case kIemNativeGstRegRef_MxCsr:
9091 Assert(idxRegInClass == 0);
9092 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
9093 break;
9094
9095 case kIemNativeGstRegRef_FpuReg:
9096 Assert(idxRegInClass < 8);
9097 AssertFailed(); /** @todo what kind of indexing? */
9098 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
9099 break;
9100
9101 case kIemNativeGstRegRef_MReg:
9102 Assert(idxRegInClass < 8);
9103 AssertFailed(); /** @todo what kind of indexing? */
9104 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
9105 break;
9106
9107 case kIemNativeGstRegRef_XReg:
9108 Assert(idxRegInClass < 16);
9109 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
9110 break;
9111
9112 case kIemNativeGstRegRef_X87: /* Not a register actually but we would just duplicate code otherwise. */
9113 Assert(idxRegInClass == 0);
9114 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87);
9115 break;
9116
9117 case kIemNativeGstRegRef_XState: /* Not a register actually but we would just duplicate code otherwise. */
9118 Assert(idxRegInClass == 0);
9119 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState);
9120 break;
9121
9122 default:
9123 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
9124 }
9125
9126 /*
9127 * Load the value into the destination register.
9128 */
9129#ifdef RT_ARCH_AMD64
9130 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
9131
9132#elif defined(RT_ARCH_ARM64)
9133 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
9134 Assert(offCpumCtx < 4096);
9135 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
9136
9137#else
9138# error "Port me!"
9139#endif
9140
9141 return off;
9142}
9143
9144
9145/**
9146 * Common code for CIMPL and AIMPL calls.
9147 *
9148 * These are calls that uses argument variables and such. They should not be
9149 * confused with internal calls required to implement an MC operation,
9150 * like a TLB load and similar.
9151 *
9152 * Upon return all that is left to do is to load any hidden arguments and
9153 * perform the call. All argument variables are freed.
9154 *
9155 * @returns New code buffer offset; throws VBox status code on error.
9156 * @param pReNative The native recompile state.
9157 * @param off The code buffer offset.
9158 * @param cArgs The total nubmer of arguments (includes hidden
9159 * count).
9160 * @param cHiddenArgs The number of hidden arguments. The hidden
9161 * arguments must not have any variable declared for
9162 * them, whereas all the regular arguments must
9163 * (tstIEMCheckMc ensures this).
9164 * @param fFlushPendingWrites Flag whether to flush pending writes (default true),
9165 * this will still flush pending writes in call volatile registers if false.
9166 */
9167DECL_HIDDEN_THROW(uint32_t)
9168iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs, bool fFlushPendingWrites /*= true*/)
9169{
9170#ifdef VBOX_STRICT
9171 /*
9172 * Assert sanity.
9173 */
9174 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
9175 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
9176 for (unsigned i = 0; i < cHiddenArgs; i++)
9177 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
9178 for (unsigned i = cHiddenArgs; i < cArgs; i++)
9179 {
9180 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
9181 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
9182 }
9183 iemNativeRegAssertSanity(pReNative);
9184#endif
9185
9186 /* We don't know what the called function makes use of, so flush any pending register writes. */
9187#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
9188 if (fFlushPendingWrites)
9189#endif
9190 off = iemNativeRegFlushPendingWrites(pReNative, off);
9191
9192 /*
9193 * Before we do anything else, go over variables that are referenced and
9194 * make sure they are not in a register.
9195 */
9196 uint32_t bmVars = pReNative->Core.bmVars;
9197 if (bmVars)
9198 {
9199 do
9200 {
9201 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
9202 bmVars &= ~RT_BIT_32(idxVar);
9203
9204 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
9205 {
9206 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
9207#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9208 if ( idxRegOld != UINT8_MAX
9209 && pReNative->Core.aVars[idxVar].fSimdReg)
9210 {
9211 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
9212 Assert(pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U) || pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT256U));
9213
9214 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
9215 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
9216 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
9217 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
9218 if (pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U))
9219 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
9220 else
9221 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
9222
9223 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
9224 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
9225
9226 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
9227 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
9228 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
9229 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
9230 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
9231 }
9232 else
9233#endif
9234 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
9235 {
9236 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
9237 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
9238 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
9239 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
9240 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
9241
9242 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
9243 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
9244 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
9245 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
9246 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
9247 }
9248 }
9249 } while (bmVars != 0);
9250#if 0 //def VBOX_STRICT
9251 iemNativeRegAssertSanity(pReNative);
9252#endif
9253 }
9254
9255 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
9256
9257#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
9258 /*
9259 * At the very first step go over the host registers that will be used for arguments
9260 * don't shadow anything which needs writing back first.
9261 */
9262 for (uint32_t i = 0; i < cRegArgs; i++)
9263 {
9264 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
9265
9266 /* Writeback any dirty guest shadows before using this register. */
9267 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows)
9268 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxArgReg);
9269 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows));
9270 }
9271#endif
9272
9273 /*
9274 * First, go over the host registers that will be used for arguments and make
9275 * sure they either hold the desired argument or are free.
9276 */
9277 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
9278 {
9279 for (uint32_t i = 0; i < cRegArgs; i++)
9280 {
9281 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
9282 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
9283 {
9284 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
9285 {
9286 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
9287 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9288 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9289 Assert(pVar->idxReg == idxArgReg);
9290 uint8_t const uArgNo = pVar->uArgNo;
9291 if (uArgNo == i)
9292 { /* prefect */ }
9293 /* The variable allocator logic should make sure this is impossible,
9294 except for when the return register is used as a parameter (ARM,
9295 but not x86). */
9296#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
9297 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
9298 {
9299# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
9300# error "Implement this"
9301# endif
9302 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
9303 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
9304 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
9305 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
9306 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
9307 }
9308#endif
9309 else
9310 {
9311 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
9312
9313 if (pVar->enmKind == kIemNativeVarKind_Stack)
9314 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
9315 else
9316 {
9317 /* just free it, can be reloaded if used again */
9318 pVar->idxReg = UINT8_MAX;
9319 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
9320 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
9321 }
9322 }
9323 }
9324 else
9325 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
9326 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
9327 }
9328 }
9329#if 0 //def VBOX_STRICT
9330 iemNativeRegAssertSanity(pReNative);
9331#endif
9332 }
9333
9334 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
9335
9336#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
9337 /*
9338 * If there are any stack arguments, make sure they are in their place as well.
9339 *
9340 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
9341 * the caller) be loading it later and it must be free (see first loop).
9342 */
9343 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
9344 {
9345 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
9346 {
9347 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
9348 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
9349 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9350 {
9351 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
9352 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
9353 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
9354 pVar->idxReg = UINT8_MAX;
9355 }
9356 else
9357 {
9358 /* Use ARG0 as temp for stuff we need registers for. */
9359 switch (pVar->enmKind)
9360 {
9361 case kIemNativeVarKind_Stack:
9362 {
9363 uint8_t const idxStackSlot = pVar->idxStackSlot;
9364 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
9365 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
9366 iemNativeStackCalcBpDisp(idxStackSlot));
9367 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
9368 continue;
9369 }
9370
9371 case kIemNativeVarKind_Immediate:
9372 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
9373 continue;
9374
9375 case kIemNativeVarKind_VarRef:
9376 {
9377 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
9378 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
9379 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
9380 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
9381 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
9382# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9383 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
9384 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
9385 if ( fSimdReg
9386 && idxRegOther != UINT8_MAX)
9387 {
9388 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
9389 if (cbVar == sizeof(RTUINT128U))
9390 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
9391 else
9392 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
9393 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
9394 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
9395 }
9396 else
9397# endif
9398 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
9399 {
9400 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
9401 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
9402 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
9403 }
9404 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
9405 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
9406 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
9407 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
9408 continue;
9409 }
9410
9411 case kIemNativeVarKind_GstRegRef:
9412 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
9413 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
9414 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
9415 continue;
9416
9417 case kIemNativeVarKind_Invalid:
9418 case kIemNativeVarKind_End:
9419 break;
9420 }
9421 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
9422 }
9423 }
9424# if 0 //def VBOX_STRICT
9425 iemNativeRegAssertSanity(pReNative);
9426# endif
9427 }
9428#else
9429 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
9430#endif
9431
9432 /*
9433 * Make sure the argument variables are loaded into their respective registers.
9434 *
9435 * We can optimize this by ASSUMING that any register allocations are for
9436 * registeres that have already been loaded and are ready. The previous step
9437 * saw to that.
9438 */
9439 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
9440 {
9441 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
9442 {
9443 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
9444 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
9445 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
9446 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
9447 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
9448 else
9449 {
9450 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
9451 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9452 {
9453 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
9454 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
9455 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
9456 | RT_BIT_32(idxArgReg);
9457 pVar->idxReg = idxArgReg;
9458 }
9459 else
9460 {
9461 /* Use ARG0 as temp for stuff we need registers for. */
9462 switch (pVar->enmKind)
9463 {
9464 case kIemNativeVarKind_Stack:
9465 {
9466 uint8_t const idxStackSlot = pVar->idxStackSlot;
9467 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
9468 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
9469 continue;
9470 }
9471
9472 case kIemNativeVarKind_Immediate:
9473 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
9474 continue;
9475
9476 case kIemNativeVarKind_VarRef:
9477 {
9478 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
9479 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
9480 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
9481 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
9482 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
9483 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
9484#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9485 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
9486 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
9487 if ( fSimdReg
9488 && idxRegOther != UINT8_MAX)
9489 {
9490 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
9491 if (cbVar == sizeof(RTUINT128U))
9492 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
9493 else
9494 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
9495 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
9496 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
9497 }
9498 else
9499#endif
9500 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
9501 {
9502 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
9503 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
9504 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
9505 }
9506 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
9507 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
9508 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
9509 continue;
9510 }
9511
9512 case kIemNativeVarKind_GstRegRef:
9513 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
9514 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
9515 continue;
9516
9517 case kIemNativeVarKind_Invalid:
9518 case kIemNativeVarKind_End:
9519 break;
9520 }
9521 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
9522 }
9523 }
9524 }
9525#if 0 //def VBOX_STRICT
9526 iemNativeRegAssertSanity(pReNative);
9527#endif
9528 }
9529#ifdef VBOX_STRICT
9530 else
9531 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
9532 {
9533 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
9534 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
9535 }
9536#endif
9537
9538 /*
9539 * Free all argument variables (simplified).
9540 * Their lifetime always expires with the call they are for.
9541 */
9542 /** @todo Make the python script check that arguments aren't used after
9543 * IEM_MC_CALL_XXXX. */
9544 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
9545 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
9546 * an argument value. There is also some FPU stuff. */
9547 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
9548 {
9549 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
9550 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
9551
9552 /* no need to free registers: */
9553 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
9554 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
9555 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
9556 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
9557 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
9558 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
9559
9560 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
9561 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
9562 iemNativeVarFreeStackSlots(pReNative, idxVar);
9563 }
9564 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
9565
9566 /*
9567 * Flush volatile registers as we make the call.
9568 */
9569 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
9570
9571 return off;
9572}
9573
9574
9575
9576/*********************************************************************************************************************************
9577* TLB Lookup. *
9578*********************************************************************************************************************************/
9579
9580/**
9581 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
9582 */
9583DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
9584{
9585 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
9586 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
9587 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
9588 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
9589
9590 /* Do the lookup manually. */
9591 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
9592 uint64_t const uTag = IEMTLB_CALC_TAG( &pVCpu->iem.s.DataTlb, GCPtrFlat);
9593 PIEMTLBENTRY const pTlbe = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
9594 if (RT_LIKELY(pTlbe->uTag == uTag))
9595 {
9596 /*
9597 * Check TLB page table level access flags.
9598 */
9599 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
9600 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
9601 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
9602 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
9603 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
9604 | IEMTLBE_F_PG_UNASSIGNED
9605 | IEMTLBE_F_PT_NO_ACCESSED
9606 | fNoWriteNoDirty | fNoUser);
9607 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
9608 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
9609 {
9610 /*
9611 * Return the address.
9612 */
9613 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
9614 if ((uintptr_t)pbAddr == uResult)
9615 return;
9616 RT_NOREF(cbMem);
9617 AssertFailed();
9618 }
9619 else
9620 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
9621 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
9622 }
9623 else
9624 AssertFailed();
9625 RT_BREAKPOINT();
9626}
9627
9628/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
9629
9630
9631
9632/*********************************************************************************************************************************
9633* Recompiler Core. *
9634*********************************************************************************************************************************/
9635
9636/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
9637static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
9638{
9639 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
9640 pDis->cbCachedInstr += cbMaxRead;
9641 RT_NOREF(cbMinRead);
9642 return VERR_NO_DATA;
9643}
9644
9645
9646DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
9647{
9648 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
9649 {
9650#define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */
9651 ENTRY(fLocalForcedActions),
9652 ENTRY(iem.s.rcPassUp),
9653 ENTRY(iem.s.fExec),
9654 ENTRY(iem.s.pbInstrBuf),
9655 ENTRY(iem.s.uInstrBufPc),
9656 ENTRY(iem.s.GCPhysInstrBuf),
9657 ENTRY(iem.s.cbInstrBufTotal),
9658 ENTRY(iem.s.idxTbCurInstr),
9659#ifdef VBOX_WITH_STATISTICS
9660 ENTRY(iem.s.StatNativeTlbHitsForFetch),
9661 ENTRY(iem.s.StatNativeTlbHitsForStore),
9662 ENTRY(iem.s.StatNativeTlbHitsForStack),
9663 ENTRY(iem.s.StatNativeTlbHitsForMapped),
9664 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
9665 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
9666 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
9667 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
9668#endif
9669 ENTRY(iem.s.DataTlb.aEntries),
9670 ENTRY(iem.s.DataTlb.uTlbRevision),
9671 ENTRY(iem.s.DataTlb.uTlbPhysRev),
9672 ENTRY(iem.s.DataTlb.cTlbHits),
9673 ENTRY(iem.s.CodeTlb.aEntries),
9674 ENTRY(iem.s.CodeTlb.uTlbRevision),
9675 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
9676 ENTRY(iem.s.CodeTlb.cTlbHits),
9677 ENTRY(pVMR3),
9678 ENTRY(cpum.GstCtx.rax),
9679 ENTRY(cpum.GstCtx.ah),
9680 ENTRY(cpum.GstCtx.rcx),
9681 ENTRY(cpum.GstCtx.ch),
9682 ENTRY(cpum.GstCtx.rdx),
9683 ENTRY(cpum.GstCtx.dh),
9684 ENTRY(cpum.GstCtx.rbx),
9685 ENTRY(cpum.GstCtx.bh),
9686 ENTRY(cpum.GstCtx.rsp),
9687 ENTRY(cpum.GstCtx.rbp),
9688 ENTRY(cpum.GstCtx.rsi),
9689 ENTRY(cpum.GstCtx.rdi),
9690 ENTRY(cpum.GstCtx.r8),
9691 ENTRY(cpum.GstCtx.r9),
9692 ENTRY(cpum.GstCtx.r10),
9693 ENTRY(cpum.GstCtx.r11),
9694 ENTRY(cpum.GstCtx.r12),
9695 ENTRY(cpum.GstCtx.r13),
9696 ENTRY(cpum.GstCtx.r14),
9697 ENTRY(cpum.GstCtx.r15),
9698 ENTRY(cpum.GstCtx.es.Sel),
9699 ENTRY(cpum.GstCtx.es.u64Base),
9700 ENTRY(cpum.GstCtx.es.u32Limit),
9701 ENTRY(cpum.GstCtx.es.Attr),
9702 ENTRY(cpum.GstCtx.cs.Sel),
9703 ENTRY(cpum.GstCtx.cs.u64Base),
9704 ENTRY(cpum.GstCtx.cs.u32Limit),
9705 ENTRY(cpum.GstCtx.cs.Attr),
9706 ENTRY(cpum.GstCtx.ss.Sel),
9707 ENTRY(cpum.GstCtx.ss.u64Base),
9708 ENTRY(cpum.GstCtx.ss.u32Limit),
9709 ENTRY(cpum.GstCtx.ss.Attr),
9710 ENTRY(cpum.GstCtx.ds.Sel),
9711 ENTRY(cpum.GstCtx.ds.u64Base),
9712 ENTRY(cpum.GstCtx.ds.u32Limit),
9713 ENTRY(cpum.GstCtx.ds.Attr),
9714 ENTRY(cpum.GstCtx.fs.Sel),
9715 ENTRY(cpum.GstCtx.fs.u64Base),
9716 ENTRY(cpum.GstCtx.fs.u32Limit),
9717 ENTRY(cpum.GstCtx.fs.Attr),
9718 ENTRY(cpum.GstCtx.gs.Sel),
9719 ENTRY(cpum.GstCtx.gs.u64Base),
9720 ENTRY(cpum.GstCtx.gs.u32Limit),
9721 ENTRY(cpum.GstCtx.gs.Attr),
9722 ENTRY(cpum.GstCtx.rip),
9723 ENTRY(cpum.GstCtx.eflags),
9724 ENTRY(cpum.GstCtx.uRipInhibitInt),
9725 ENTRY(cpum.GstCtx.cr0),
9726 ENTRY(cpum.GstCtx.cr4),
9727 ENTRY(cpum.GstCtx.aXcr[0]),
9728 ENTRY(cpum.GstCtx.aXcr[1]),
9729#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9730 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
9731 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
9732 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
9733 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
9734 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
9735 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
9736 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
9737 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
9738 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
9739 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
9740 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
9741 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
9742 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
9743 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
9744 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
9745 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
9746 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
9747 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
9748 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
9749 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
9750 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
9751 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
9752 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
9753 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
9754 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
9755 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
9756 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
9757 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
9758 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
9759 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
9760 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
9761 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
9762#endif
9763#undef ENTRY
9764 };
9765#ifdef VBOX_STRICT
9766 static bool s_fOrderChecked = false;
9767 if (!s_fOrderChecked)
9768 {
9769 s_fOrderChecked = true;
9770 uint32_t offPrev = s_aMembers[0].off;
9771 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
9772 {
9773 Assert(s_aMembers[i].off > offPrev);
9774 offPrev = s_aMembers[i].off;
9775 }
9776 }
9777#endif
9778
9779 /*
9780 * Binary lookup.
9781 */
9782 unsigned iStart = 0;
9783 unsigned iEnd = RT_ELEMENTS(s_aMembers);
9784 for (;;)
9785 {
9786 unsigned const iCur = iStart + (iEnd - iStart) / 2;
9787 uint32_t const offCur = s_aMembers[iCur].off;
9788 if (off < offCur)
9789 {
9790 if (iCur != iStart)
9791 iEnd = iCur;
9792 else
9793 break;
9794 }
9795 else if (off > offCur)
9796 {
9797 if (iCur + 1 < iEnd)
9798 iStart = iCur + 1;
9799 else
9800 break;
9801 }
9802 else
9803 return s_aMembers[iCur].pszName;
9804 }
9805#ifdef VBOX_WITH_STATISTICS
9806 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
9807 return "iem.s.acThreadedFuncStats[iFn]";
9808#endif
9809 return NULL;
9810}
9811
9812
9813DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
9814{
9815 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
9816#if defined(RT_ARCH_AMD64)
9817 static const char * const a_apszMarkers[] =
9818 {
9819 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
9820 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
9821 };
9822#endif
9823
9824 char szDisBuf[512];
9825 DISSTATE Dis;
9826 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
9827 uint32_t const cNative = pTb->Native.cInstructions;
9828 uint32_t offNative = 0;
9829#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9830 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
9831#endif
9832 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9833 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9834 : DISCPUMODE_64BIT;
9835#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9836 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
9837#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9838 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
9839#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9840# error "Port me"
9841#else
9842 csh hDisasm = ~(size_t)0;
9843# if defined(RT_ARCH_AMD64)
9844 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
9845# elif defined(RT_ARCH_ARM64)
9846 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
9847# else
9848# error "Port me"
9849# endif
9850 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
9851
9852 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
9853 //Assert(rcCs == CS_ERR_OK);
9854#endif
9855
9856 /*
9857 * Print TB info.
9858 */
9859 pHlp->pfnPrintf(pHlp,
9860 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
9861 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
9862 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
9863 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
9864#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9865 if (pDbgInfo && pDbgInfo->cEntries > 1)
9866 {
9867 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
9868
9869 /*
9870 * This disassembly is driven by the debug info which follows the native
9871 * code and indicates when it starts with the next guest instructions,
9872 * where labels are and such things.
9873 */
9874 uint32_t idxThreadedCall = 0;
9875 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
9876 uint8_t idxRange = UINT8_MAX;
9877 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
9878 uint32_t offRange = 0;
9879 uint32_t offOpcodes = 0;
9880 uint32_t const cbOpcodes = pTb->cbOpcodes;
9881 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
9882 uint32_t const cDbgEntries = pDbgInfo->cEntries;
9883 uint32_t iDbgEntry = 1;
9884 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
9885
9886 while (offNative < cNative)
9887 {
9888 /* If we're at or have passed the point where the next chunk of debug
9889 info starts, process it. */
9890 if (offDbgNativeNext <= offNative)
9891 {
9892 offDbgNativeNext = UINT32_MAX;
9893 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
9894 {
9895 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
9896 {
9897 case kIemTbDbgEntryType_GuestInstruction:
9898 {
9899 /* Did the exec flag change? */
9900 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
9901 {
9902 pHlp->pfnPrintf(pHlp,
9903 " fExec change %#08x -> %#08x %s\n",
9904 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9905 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9906 szDisBuf, sizeof(szDisBuf)));
9907 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
9908 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9909 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9910 : DISCPUMODE_64BIT;
9911 }
9912
9913 /* New opcode range? We need to fend up a spurious debug info entry here for cases
9914 where the compilation was aborted before the opcode was recorded and the actual
9915 instruction was translated to a threaded call. This may happen when we run out
9916 of ranges, or when some complicated interrupts/FFs are found to be pending or
9917 similar. So, we just deal with it here rather than in the compiler code as it
9918 is a lot simpler to do here. */
9919 if ( idxRange == UINT8_MAX
9920 || idxRange >= cRanges
9921 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
9922 {
9923 idxRange += 1;
9924 if (idxRange < cRanges)
9925 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
9926 else
9927 continue;
9928 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
9929 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
9930 + (pTb->aRanges[idxRange].idxPhysPage == 0
9931 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9932 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
9933 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9934 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
9935 pTb->aRanges[idxRange].idxPhysPage);
9936 GCPhysPc += offRange;
9937 }
9938
9939 /* Disassemble the instruction. */
9940 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
9941 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
9942 uint32_t cbInstr = 1;
9943 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9944 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
9945 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9946 if (RT_SUCCESS(rc))
9947 {
9948 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9949 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9950 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9951 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9952
9953 static unsigned const s_offMarker = 55;
9954 static char const s_szMarker[] = " ; <--- guest";
9955 if (cch < s_offMarker)
9956 {
9957 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
9958 cch = s_offMarker;
9959 }
9960 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
9961 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
9962
9963 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
9964 }
9965 else
9966 {
9967 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
9968 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
9969 cbInstr = 1;
9970 }
9971 GCPhysPc += cbInstr;
9972 offOpcodes += cbInstr;
9973 offRange += cbInstr;
9974 continue;
9975 }
9976
9977 case kIemTbDbgEntryType_ThreadedCall:
9978 pHlp->pfnPrintf(pHlp,
9979 " Call #%u to %s (%u args) - %s\n",
9980 idxThreadedCall,
9981 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9982 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9983 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
9984 idxThreadedCall++;
9985 continue;
9986
9987 case kIemTbDbgEntryType_GuestRegShadowing:
9988 {
9989 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9990 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
9991 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
9992 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
9993 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9994 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
9995 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s \n", pszGstReg,
9996 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
9997 else
9998 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
9999 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
10000 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
10001 continue;
10002 }
10003
10004#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
10005 case kIemTbDbgEntryType_GuestSimdRegShadowing:
10006 {
10007 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
10008 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
10009 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
10010 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
10011 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
10012 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
10013 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
10014 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
10015 else
10016 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
10017 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
10018 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
10019 continue;
10020 }
10021#endif
10022
10023 case kIemTbDbgEntryType_Label:
10024 {
10025 const char *pszName = "what_the_fudge";
10026 const char *pszComment = "";
10027 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
10028 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
10029 {
10030 case kIemNativeLabelType_Return: pszName = "Return"; break;
10031 case kIemNativeLabelType_ReturnBreak: pszName = "ReturnBreak"; break;
10032 case kIemNativeLabelType_ReturnWithFlags: pszName = "ReturnWithFlags"; break;
10033 case kIemNativeLabelType_NonZeroRetOrPassUp: pszName = "NonZeroRetOrPassUp"; break;
10034 case kIemNativeLabelType_RaiseDe: pszName = "RaiseDe"; break;
10035 case kIemNativeLabelType_RaiseUd: pszName = "RaiseUd"; break;
10036 case kIemNativeLabelType_RaiseSseRelated: pszName = "RaiseSseRelated"; break;
10037 case kIemNativeLabelType_RaiseAvxRelated: pszName = "RaiseAvxRelated"; break;
10038 case kIemNativeLabelType_RaiseSseAvxFpRelated: pszName = "RaiseSseAvxFpRelated"; break;
10039 case kIemNativeLabelType_RaiseNm: pszName = "RaiseNm"; break;
10040 case kIemNativeLabelType_RaiseGp0: pszName = "RaiseGp0"; break;
10041 case kIemNativeLabelType_RaiseMf: pszName = "RaiseMf"; break;
10042 case kIemNativeLabelType_RaiseXf: pszName = "RaiseXf"; break;
10043 case kIemNativeLabelType_ObsoleteTb: pszName = "ObsoleteTb"; break;
10044 case kIemNativeLabelType_NeedCsLimChecking: pszName = "NeedCsLimChecking"; break;
10045 case kIemNativeLabelType_CheckBranchMiss: pszName = "CheckBranchMiss"; break;
10046 case kIemNativeLabelType_If:
10047 pszName = "If";
10048 fNumbered = true;
10049 break;
10050 case kIemNativeLabelType_Else:
10051 pszName = "Else";
10052 fNumbered = true;
10053 pszComment = " ; regs state restored pre-if-block";
10054 break;
10055 case kIemNativeLabelType_Endif:
10056 pszName = "Endif";
10057 fNumbered = true;
10058 break;
10059 case kIemNativeLabelType_CheckIrq:
10060 pszName = "CheckIrq_CheckVM";
10061 fNumbered = true;
10062 break;
10063 case kIemNativeLabelType_TlbLookup:
10064 pszName = "TlbLookup";
10065 fNumbered = true;
10066 break;
10067 case kIemNativeLabelType_TlbMiss:
10068 pszName = "TlbMiss";
10069 fNumbered = true;
10070 break;
10071 case kIemNativeLabelType_TlbDone:
10072 pszName = "TlbDone";
10073 fNumbered = true;
10074 break;
10075 case kIemNativeLabelType_Invalid:
10076 case kIemNativeLabelType_End:
10077 break;
10078 }
10079 if (fNumbered)
10080 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
10081 else
10082 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
10083 continue;
10084 }
10085
10086 case kIemTbDbgEntryType_NativeOffset:
10087 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
10088 Assert(offDbgNativeNext > offNative);
10089 break;
10090
10091#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
10092 case kIemTbDbgEntryType_DelayedPcUpdate:
10093 pHlp->pfnPrintf(pHlp,
10094 " Updating guest PC value by %u (cInstrSkipped=%u)\n",
10095 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
10096 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
10097 continue;
10098#endif
10099
10100#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
10101 case kIemTbDbgEntryType_GuestRegDirty:
10102 {
10103 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
10104 const char * const pszGstReg = pEntry->GuestRegDirty.fSimdReg
10105 ? g_aGstSimdShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName
10106 : g_aGstShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName;
10107 const char * const pszHstReg = pEntry->GuestRegDirty.fSimdReg
10108 ? g_apszIemNativeHstSimdRegNames[pEntry->GuestRegDirty.idxHstReg]
10109 : g_apszIemNativeHstRegNames[pEntry->GuestRegDirty.idxHstReg];
10110 pHlp->pfnPrintf(pHlp,
10111 " Guest register %s (shadowed by %s) is now dirty\n",
10112 pszGstReg, pszHstReg);
10113 continue;
10114 }
10115
10116 case kIemTbDbgEntryType_GuestRegWriteback:
10117 pHlp->pfnPrintf(pHlp,
10118 " Writing dirty %s registers (gst %#RX64)\n",
10119 pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fSimdReg ? "SIMD" : "general",
10120 pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fGstReg);
10121 continue;
10122#endif
10123
10124 default:
10125 AssertFailed();
10126 }
10127 iDbgEntry++;
10128 break;
10129 }
10130 }
10131
10132 /*
10133 * Disassemble the next native instruction.
10134 */
10135 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
10136# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
10137 uint32_t cbInstr = sizeof(paNative[0]);
10138 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
10139 if (RT_SUCCESS(rc))
10140 {
10141# if defined(RT_ARCH_AMD64)
10142 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
10143 {
10144 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
10145 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
10146 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
10147 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
10148 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
10149 uInfo & 0x8000 ? "recompiled" : "todo");
10150 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
10151 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
10152 else
10153 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
10154 }
10155 else
10156# endif
10157 {
10158 const char *pszAnnotation = NULL;
10159# ifdef RT_ARCH_AMD64
10160 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
10161 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
10162 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
10163 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
10164 PCDISOPPARAM pMemOp;
10165 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))
10166 pMemOp = &Dis.Param1;
10167 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))
10168 pMemOp = &Dis.Param2;
10169 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))
10170 pMemOp = &Dis.Param3;
10171 else
10172 pMemOp = NULL;
10173 if ( pMemOp
10174 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
10175 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
10176 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
10177 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
10178
10179#elif defined(RT_ARCH_ARM64)
10180 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
10181 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
10182 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
10183# else
10184# error "Port me"
10185# endif
10186 if (pszAnnotation)
10187 {
10188 static unsigned const s_offAnnotation = 55;
10189 size_t const cchAnnotation = strlen(pszAnnotation);
10190 size_t cchDis = strlen(szDisBuf);
10191 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
10192 {
10193 if (cchDis < s_offAnnotation)
10194 {
10195 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
10196 cchDis = s_offAnnotation;
10197 }
10198 szDisBuf[cchDis++] = ' ';
10199 szDisBuf[cchDis++] = ';';
10200 szDisBuf[cchDis++] = ' ';
10201 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
10202 }
10203 }
10204 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
10205 }
10206 }
10207 else
10208 {
10209# if defined(RT_ARCH_AMD64)
10210 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
10211 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
10212# elif defined(RT_ARCH_ARM64)
10213 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
10214# else
10215# error "Port me"
10216# endif
10217 cbInstr = sizeof(paNative[0]);
10218 }
10219 offNative += cbInstr / sizeof(paNative[0]);
10220
10221# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
10222 cs_insn *pInstr;
10223 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
10224 (uintptr_t)pNativeCur, 1, &pInstr);
10225 if (cInstrs > 0)
10226 {
10227 Assert(cInstrs == 1);
10228 const char *pszAnnotation = NULL;
10229# if defined(RT_ARCH_ARM64)
10230 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
10231 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
10232 {
10233 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
10234 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
10235 char *psz = strchr(pInstr->op_str, '[');
10236 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
10237 {
10238 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
10239 int32_t off = -1;
10240 psz += 4;
10241 if (*psz == ']')
10242 off = 0;
10243 else if (*psz == ',')
10244 {
10245 psz = RTStrStripL(psz + 1);
10246 if (*psz == '#')
10247 off = RTStrToInt32(&psz[1]);
10248 /** @todo deal with index registers and LSL as well... */
10249 }
10250 if (off >= 0)
10251 pszAnnotation = iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
10252 }
10253 }
10254# endif
10255
10256 size_t const cchOp = strlen(pInstr->op_str);
10257# if defined(RT_ARCH_AMD64)
10258 if (pszAnnotation)
10259 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
10260 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
10261 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
10262 else
10263 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
10264 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
10265
10266# else
10267 if (pszAnnotation)
10268 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
10269 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
10270 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
10271 else
10272 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
10273 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
10274# endif
10275 offNative += pInstr->size / sizeof(*pNativeCur);
10276 cs_free(pInstr, cInstrs);
10277 }
10278 else
10279 {
10280# if defined(RT_ARCH_AMD64)
10281 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
10282 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
10283# else
10284 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
10285# endif
10286 offNative++;
10287 }
10288# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
10289 }
10290 }
10291 else
10292#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
10293 {
10294 /*
10295 * No debug info, just disassemble the x86 code and then the native code.
10296 *
10297 * First the guest code:
10298 */
10299 for (unsigned i = 0; i < pTb->cRanges; i++)
10300 {
10301 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
10302 + (pTb->aRanges[i].idxPhysPage == 0
10303 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
10304 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
10305 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
10306 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
10307 unsigned off = pTb->aRanges[i].offOpcodes;
10308 /** @todo this ain't working when crossing pages! */
10309 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
10310 while (off < cbOpcodes)
10311 {
10312 uint32_t cbInstr = 1;
10313 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
10314 &pTb->pabOpcodes[off], cbOpcodes - off,
10315 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
10316 if (RT_SUCCESS(rc))
10317 {
10318 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
10319 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
10320 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
10321 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
10322 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
10323 GCPhysPc += cbInstr;
10324 off += cbInstr;
10325 }
10326 else
10327 {
10328 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
10329 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
10330 break;
10331 }
10332 }
10333 }
10334
10335 /*
10336 * Then the native code:
10337 */
10338 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
10339 while (offNative < cNative)
10340 {
10341 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
10342# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
10343 uint32_t cbInstr = sizeof(paNative[0]);
10344 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
10345 if (RT_SUCCESS(rc))
10346 {
10347# if defined(RT_ARCH_AMD64)
10348 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
10349 {
10350 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
10351 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
10352 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
10353 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
10354 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
10355 uInfo & 0x8000 ? "recompiled" : "todo");
10356 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
10357 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
10358 else
10359 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
10360 }
10361 else
10362# endif
10363 {
10364# ifdef RT_ARCH_AMD64
10365 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
10366 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
10367 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
10368 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
10369# elif defined(RT_ARCH_ARM64)
10370 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
10371 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
10372 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
10373# else
10374# error "Port me"
10375# endif
10376 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
10377 }
10378 }
10379 else
10380 {
10381# if defined(RT_ARCH_AMD64)
10382 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
10383 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
10384# else
10385 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
10386# endif
10387 cbInstr = sizeof(paNative[0]);
10388 }
10389 offNative += cbInstr / sizeof(paNative[0]);
10390
10391# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
10392 cs_insn *pInstr;
10393 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
10394 (uintptr_t)pNativeCur, 1, &pInstr);
10395 if (cInstrs > 0)
10396 {
10397 Assert(cInstrs == 1);
10398# if defined(RT_ARCH_AMD64)
10399 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
10400 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
10401# else
10402 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
10403 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
10404# endif
10405 offNative += pInstr->size / sizeof(*pNativeCur);
10406 cs_free(pInstr, cInstrs);
10407 }
10408 else
10409 {
10410# if defined(RT_ARCH_AMD64)
10411 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
10412 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
10413# else
10414 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
10415# endif
10416 offNative++;
10417 }
10418# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
10419 }
10420 }
10421
10422#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
10423 /* Cleanup. */
10424 cs_close(&hDisasm);
10425#endif
10426}
10427
10428
10429/**
10430 * Recompiles the given threaded TB into a native one.
10431 *
10432 * In case of failure the translation block will be returned as-is.
10433 *
10434 * @returns pTb.
10435 * @param pVCpu The cross context virtual CPU structure of the calling
10436 * thread.
10437 * @param pTb The threaded translation to recompile to native.
10438 */
10439DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
10440{
10441 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
10442
10443 /*
10444 * The first time thru, we allocate the recompiler state, the other times
10445 * we just need to reset it before using it again.
10446 */
10447 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
10448 if (RT_LIKELY(pReNative))
10449 iemNativeReInit(pReNative, pTb);
10450 else
10451 {
10452 pReNative = iemNativeInit(pVCpu, pTb);
10453 AssertReturn(pReNative, pTb);
10454 }
10455
10456#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
10457 /*
10458 * First do liveness analysis. This is done backwards.
10459 */
10460 {
10461 uint32_t idxCall = pTb->Thrd.cCalls;
10462 if (idxCall <= pReNative->cLivenessEntriesAlloc)
10463 { /* likely */ }
10464 else
10465 {
10466 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
10467 while (idxCall > cAlloc)
10468 cAlloc *= 2;
10469 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
10470 AssertReturn(pvNew, pTb);
10471 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
10472 pReNative->cLivenessEntriesAlloc = cAlloc;
10473 }
10474 AssertReturn(idxCall > 0, pTb);
10475 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
10476
10477 /* The initial (final) entry. */
10478 idxCall--;
10479 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
10480
10481 /* Loop backwards thru the calls and fill in the other entries. */
10482 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
10483 while (idxCall > 0)
10484 {
10485 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
10486 if (pfnLiveness)
10487 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
10488 else
10489 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
10490 pCallEntry--;
10491 idxCall--;
10492 }
10493
10494# ifdef VBOX_WITH_STATISTICS
10495 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
10496 to 'clobbered' rather that 'input'. */
10497 /** @todo */
10498# endif
10499 }
10500#endif
10501
10502 /*
10503 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
10504 * for aborting if an error happens.
10505 */
10506 uint32_t cCallsLeft = pTb->Thrd.cCalls;
10507#ifdef LOG_ENABLED
10508 uint32_t const cCallsOrg = cCallsLeft;
10509#endif
10510 uint32_t off = 0;
10511 int rc = VINF_SUCCESS;
10512 IEMNATIVE_TRY_SETJMP(pReNative, rc)
10513 {
10514 /*
10515 * Emit prolog code (fixed).
10516 */
10517 off = iemNativeEmitProlog(pReNative, off);
10518
10519 /*
10520 * Convert the calls to native code.
10521 */
10522#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10523 int32_t iGstInstr = -1;
10524#endif
10525#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
10526 uint32_t cThreadedCalls = 0;
10527 uint32_t cRecompiledCalls = 0;
10528#endif
10529#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
10530 uint32_t idxCurCall = 0;
10531#endif
10532 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
10533 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
10534 while (cCallsLeft-- > 0)
10535 {
10536 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
10537#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
10538 pReNative->idxCurCall = idxCurCall;
10539#endif
10540
10541 /*
10542 * Debug info, assembly markup and statistics.
10543 */
10544#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
10545 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
10546 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
10547#endif
10548#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10549 iemNativeDbgInfoAddNativeOffset(pReNative, off);
10550 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
10551 {
10552 if (iGstInstr < (int32_t)pTb->cInstructions)
10553 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
10554 else
10555 Assert(iGstInstr == pTb->cInstructions);
10556 iGstInstr = pCallEntry->idxInstr;
10557 }
10558 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
10559#endif
10560#if defined(VBOX_STRICT)
10561 off = iemNativeEmitMarker(pReNative, off,
10562 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
10563#endif
10564#if defined(VBOX_STRICT)
10565 iemNativeRegAssertSanity(pReNative);
10566#endif
10567#ifdef VBOX_WITH_STATISTICS
10568 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
10569#endif
10570
10571 /*
10572 * Actual work.
10573 */
10574 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
10575 pfnRecom ? "(recompiled)" : "(todo)"));
10576 if (pfnRecom) /** @todo stats on this. */
10577 {
10578 off = pfnRecom(pReNative, off, pCallEntry);
10579 STAM_REL_STATS({cRecompiledCalls++;});
10580 }
10581 else
10582 {
10583 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
10584 STAM_REL_STATS({cThreadedCalls++;});
10585 }
10586 Assert(off <= pReNative->cInstrBufAlloc);
10587 Assert(pReNative->cCondDepth == 0);
10588
10589#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
10590 if (LogIs2Enabled())
10591 {
10592 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
10593# ifndef IEMLIVENESS_EXTENDED_LAYOUT
10594 static const char s_achState[] = "CUXI";
10595# else
10596 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
10597# endif
10598
10599 char szGpr[17];
10600 for (unsigned i = 0; i < 16; i++)
10601 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
10602 szGpr[16] = '\0';
10603
10604 char szSegBase[X86_SREG_COUNT + 1];
10605 char szSegLimit[X86_SREG_COUNT + 1];
10606 char szSegAttrib[X86_SREG_COUNT + 1];
10607 char szSegSel[X86_SREG_COUNT + 1];
10608 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
10609 {
10610 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
10611 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
10612 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
10613 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
10614 }
10615 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
10616 = szSegSel[X86_SREG_COUNT] = '\0';
10617
10618 char szEFlags[8];
10619 for (unsigned i = 0; i < 7; i++)
10620 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
10621 szEFlags[7] = '\0';
10622
10623 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
10624 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
10625 }
10626#endif
10627
10628 /*
10629 * Advance.
10630 */
10631 pCallEntry++;
10632#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
10633 idxCurCall++;
10634#endif
10635 }
10636
10637 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
10638 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
10639 if (!cThreadedCalls)
10640 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
10641
10642 /*
10643 * Emit the epilog code.
10644 */
10645 uint32_t idxReturnLabel;
10646 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
10647
10648 /*
10649 * Generate special jump labels.
10650 */
10651 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
10652 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
10653 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
10654 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
10655
10656 /*
10657 * Generate simple TB tail labels that just calls a help with a pVCpu
10658 * arg and either return or longjmps/throws a non-zero status.
10659 *
10660 * The array entries must be ordered by enmLabel value so we can index
10661 * using fTailLabels bit numbers.
10662 */
10663 typedef IEM_DECL_NATIVE_HLP_PTR(int, PFNIEMNATIVESIMPLETAILLABELCALL,(PVMCPUCC pVCpu));
10664 static struct
10665 {
10666 IEMNATIVELABELTYPE enmLabel;
10667 PFNIEMNATIVESIMPLETAILLABELCALL pfnCallback;
10668 } const g_aSimpleTailLabels[] =
10669 {
10670 { kIemNativeLabelType_Invalid, NULL },
10671 { kIemNativeLabelType_RaiseDe, iemNativeHlpExecRaiseDe },
10672 { kIemNativeLabelType_RaiseUd, iemNativeHlpExecRaiseUd },
10673 { kIemNativeLabelType_RaiseSseRelated, iemNativeHlpExecRaiseSseRelated },
10674 { kIemNativeLabelType_RaiseAvxRelated, iemNativeHlpExecRaiseAvxRelated },
10675 { kIemNativeLabelType_RaiseSseAvxFpRelated, iemNativeHlpExecRaiseSseAvxFpRelated },
10676 { kIemNativeLabelType_RaiseNm, iemNativeHlpExecRaiseNm },
10677 { kIemNativeLabelType_RaiseGp0, iemNativeHlpExecRaiseGp0 },
10678 { kIemNativeLabelType_RaiseMf, iemNativeHlpExecRaiseMf },
10679 { kIemNativeLabelType_RaiseXf, iemNativeHlpExecRaiseXf },
10680 { kIemNativeLabelType_ObsoleteTb, iemNativeHlpObsoleteTb },
10681 { kIemNativeLabelType_NeedCsLimChecking, iemNativeHlpNeedCsLimChecking },
10682 { kIemNativeLabelType_CheckBranchMiss, iemNativeHlpCheckBranchMiss },
10683 };
10684 AssertCompile(RT_ELEMENTS(g_aSimpleTailLabels) == (unsigned)kIemNativeLabelType_LastSimple + 1U);
10685 AssertCompile(kIemNativeLabelType_Invalid == 0);
10686 uint64_t fTailLabels = pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastSimple + 1U) - 2U);
10687 if (fTailLabels)
10688 {
10689 do
10690 {
10691 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)(ASMBitFirstSetU64(fTailLabels) - 1U);
10692 fTailLabels &= ~RT_BIT_64(enmLabel);
10693 Assert(g_aSimpleTailLabels[enmLabel].enmLabel == enmLabel);
10694
10695 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
10696 Assert(idxLabel != UINT32_MAX);
10697 if (idxLabel != UINT32_MAX)
10698 {
10699 iemNativeLabelDefine(pReNative, idxLabel, off);
10700
10701 /* int pfnCallback(PVMCPUCC pVCpu) */
10702 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
10703 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_aSimpleTailLabels[enmLabel].pfnCallback);
10704
10705 /* jump back to the return sequence. */
10706 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
10707 }
10708
10709 } while (fTailLabels);
10710 }
10711 }
10712 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
10713 {
10714 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
10715 return pTb;
10716 }
10717 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
10718 Assert(off <= pReNative->cInstrBufAlloc);
10719
10720 /*
10721 * Make sure all labels has been defined.
10722 */
10723 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
10724#ifdef VBOX_STRICT
10725 uint32_t const cLabels = pReNative->cLabels;
10726 for (uint32_t i = 0; i < cLabels; i++)
10727 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
10728#endif
10729
10730 /*
10731 * Allocate executable memory, copy over the code we've generated.
10732 */
10733 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
10734 if (pTbAllocator->pDelayedFreeHead)
10735 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
10736
10737 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
10738 AssertReturn(paFinalInstrBuf, pTb);
10739 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
10740
10741 /*
10742 * Apply fixups.
10743 */
10744 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
10745 uint32_t const cFixups = pReNative->cFixups;
10746 for (uint32_t i = 0; i < cFixups; i++)
10747 {
10748 Assert(paFixups[i].off < off);
10749 Assert(paFixups[i].idxLabel < cLabels);
10750 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
10751 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
10752 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
10753 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
10754 switch (paFixups[i].enmType)
10755 {
10756#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
10757 case kIemNativeFixupType_Rel32:
10758 Assert(paFixups[i].off + 4 <= off);
10759 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10760 continue;
10761
10762#elif defined(RT_ARCH_ARM64)
10763 case kIemNativeFixupType_RelImm26At0:
10764 {
10765 Assert(paFixups[i].off < off);
10766 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10767 Assert(offDisp >= -262144 && offDisp < 262144);
10768 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
10769 continue;
10770 }
10771
10772 case kIemNativeFixupType_RelImm19At5:
10773 {
10774 Assert(paFixups[i].off < off);
10775 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10776 Assert(offDisp >= -262144 && offDisp < 262144);
10777 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
10778 continue;
10779 }
10780
10781 case kIemNativeFixupType_RelImm14At5:
10782 {
10783 Assert(paFixups[i].off < off);
10784 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10785 Assert(offDisp >= -8192 && offDisp < 8192);
10786 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
10787 continue;
10788 }
10789
10790#endif
10791 case kIemNativeFixupType_Invalid:
10792 case kIemNativeFixupType_End:
10793 break;
10794 }
10795 AssertFailed();
10796 }
10797
10798 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
10799 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
10800
10801 /*
10802 * Convert the translation block.
10803 */
10804 RTMemFree(pTb->Thrd.paCalls);
10805 pTb->Native.paInstructions = paFinalInstrBuf;
10806 pTb->Native.cInstructions = off;
10807 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
10808#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10809 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
10810 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
10811#endif
10812
10813 Assert(pTbAllocator->cThreadedTbs > 0);
10814 pTbAllocator->cThreadedTbs -= 1;
10815 pTbAllocator->cNativeTbs += 1;
10816 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
10817
10818#ifdef LOG_ENABLED
10819 /*
10820 * Disassemble to the log if enabled.
10821 */
10822 if (LogIs3Enabled())
10823 {
10824 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
10825 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
10826# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
10827 RTLogFlush(NULL);
10828# endif
10829 }
10830#endif
10831 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
10832
10833 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
10834 return pTb;
10835}
10836
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette