VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 103953

Last change on this file since 103953 was 103953, checked in by vboxsync, 11 months ago

VMM/IEM: Implement native emitters for IEM_MC_STORE_MEM_U256_ALIGN_AVX()/IEM_MC_STORE_MEM_FLAT_U256_ALIGN_AVX(), bugref:10614

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 427.4 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 103953 2024-03-20 12:29:21Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMN8veRecompilerTlbLookup.h"
103#include "IEMNativeFunctions.h"
104
105
106/*
107 * Narrow down configs here to avoid wasting time on unused configs here.
108 * Note! Same checks in IEMAllThrdRecompiler.cpp.
109 */
110
111#ifndef IEM_WITH_CODE_TLB
112# error The code TLB must be enabled for the recompiler.
113#endif
114
115#ifndef IEM_WITH_DATA_TLB
116# error The data TLB must be enabled for the recompiler.
117#endif
118
119#ifndef IEM_WITH_SETJMP
120# error The setjmp approach must be enabled for the recompiler.
121#endif
122
123/** @todo eliminate this clang build hack. */
124#if RT_CLANG_PREREQ(4, 0)
125# pragma GCC diagnostic ignored "-Wunused-function"
126#endif
127
128
129/*********************************************************************************************************************************
130* Internal Functions *
131*********************************************************************************************************************************/
132#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
133static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
134#endif
135DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
136DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
137 IEMNATIVEGSTREG enmGstReg, uint32_t off);
138DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
139
140
141/*********************************************************************************************************************************
142* Executable Memory Allocator *
143*********************************************************************************************************************************/
144/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
145 * Use an alternative chunk sub-allocator that does store internal data
146 * in the chunk.
147 *
148 * Using the RTHeapSimple is not practial on newer darwin systems where
149 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
150 * memory. We would have to change the protection of the whole chunk for
151 * every call to RTHeapSimple, which would be rather expensive.
152 *
153 * This alternative implemenation let restrict page protection modifications
154 * to the pages backing the executable memory we just allocated.
155 */
156#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
157/** The chunk sub-allocation unit size in bytes. */
158#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
159/** The chunk sub-allocation unit size as a shift factor. */
160#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
161
162#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
163# ifdef IEMNATIVE_USE_GDB_JIT
164# define IEMNATIVE_USE_GDB_JIT_ET_DYN
165
166/** GDB JIT: Code entry. */
167typedef struct GDBJITCODEENTRY
168{
169 struct GDBJITCODEENTRY *pNext;
170 struct GDBJITCODEENTRY *pPrev;
171 uint8_t *pbSymFile;
172 uint64_t cbSymFile;
173} GDBJITCODEENTRY;
174
175/** GDB JIT: Actions. */
176typedef enum GDBJITACTIONS : uint32_t
177{
178 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
179} GDBJITACTIONS;
180
181/** GDB JIT: Descriptor. */
182typedef struct GDBJITDESCRIPTOR
183{
184 uint32_t uVersion;
185 GDBJITACTIONS enmAction;
186 GDBJITCODEENTRY *pRelevant;
187 GDBJITCODEENTRY *pHead;
188 /** Our addition: */
189 GDBJITCODEENTRY *pTail;
190} GDBJITDESCRIPTOR;
191
192/** GDB JIT: Our simple symbol file data. */
193typedef struct GDBJITSYMFILE
194{
195 Elf64_Ehdr EHdr;
196# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
197 Elf64_Shdr aShdrs[5];
198# else
199 Elf64_Shdr aShdrs[7];
200 Elf64_Phdr aPhdrs[2];
201# endif
202 /** The dwarf ehframe data for the chunk. */
203 uint8_t abEhFrame[512];
204 char szzStrTab[128];
205 Elf64_Sym aSymbols[3];
206# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
207 Elf64_Sym aDynSyms[2];
208 Elf64_Dyn aDyn[6];
209# endif
210} GDBJITSYMFILE;
211
212extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
213extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
214
215/** Init once for g_IemNativeGdbJitLock. */
216static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
217/** Init once for the critical section. */
218static RTCRITSECT g_IemNativeGdbJitLock;
219
220/** GDB reads the info here. */
221GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
222
223/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
224DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
225{
226 ASMNopPause();
227}
228
229/** @callback_method_impl{FNRTONCE} */
230static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
231{
232 RT_NOREF(pvUser);
233 return RTCritSectInit(&g_IemNativeGdbJitLock);
234}
235
236
237# endif /* IEMNATIVE_USE_GDB_JIT */
238
239/**
240 * Per-chunk unwind info for non-windows hosts.
241 */
242typedef struct IEMEXECMEMCHUNKEHFRAME
243{
244# ifdef IEMNATIVE_USE_LIBUNWIND
245 /** The offset of the FDA into abEhFrame. */
246 uintptr_t offFda;
247# else
248 /** 'struct object' storage area. */
249 uint8_t abObject[1024];
250# endif
251# ifdef IEMNATIVE_USE_GDB_JIT
252# if 0
253 /** The GDB JIT 'symbol file' data. */
254 GDBJITSYMFILE GdbJitSymFile;
255# endif
256 /** The GDB JIT list entry. */
257 GDBJITCODEENTRY GdbJitEntry;
258# endif
259 /** The dwarf ehframe data for the chunk. */
260 uint8_t abEhFrame[512];
261} IEMEXECMEMCHUNKEHFRAME;
262/** Pointer to per-chunk info info for non-windows hosts. */
263typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
264#endif
265
266
267/**
268 * An chunk of executable memory.
269 */
270typedef struct IEMEXECMEMCHUNK
271{
272#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
273 /** Number of free items in this chunk. */
274 uint32_t cFreeUnits;
275 /** Hint were to start searching for free space in the allocation bitmap. */
276 uint32_t idxFreeHint;
277#else
278 /** The heap handle. */
279 RTHEAPSIMPLE hHeap;
280#endif
281 /** Pointer to the chunk. */
282 void *pvChunk;
283#ifdef IN_RING3
284 /**
285 * Pointer to the unwind information.
286 *
287 * This is used during C++ throw and longjmp (windows and probably most other
288 * platforms). Some debuggers (windbg) makes use of it as well.
289 *
290 * Windows: This is allocated from hHeap on windows because (at least for
291 * AMD64) the UNWIND_INFO structure address in the
292 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
293 *
294 * Others: Allocated from the regular heap to avoid unnecessary executable data
295 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
296 void *pvUnwindInfo;
297#elif defined(IN_RING0)
298 /** Allocation handle. */
299 RTR0MEMOBJ hMemObj;
300#endif
301} IEMEXECMEMCHUNK;
302/** Pointer to a memory chunk. */
303typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
304
305
306/**
307 * Executable memory allocator for the native recompiler.
308 */
309typedef struct IEMEXECMEMALLOCATOR
310{
311 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
312 uint32_t uMagic;
313
314 /** The chunk size. */
315 uint32_t cbChunk;
316 /** The maximum number of chunks. */
317 uint32_t cMaxChunks;
318 /** The current number of chunks. */
319 uint32_t cChunks;
320 /** Hint where to start looking for available memory. */
321 uint32_t idxChunkHint;
322 /** Statistics: Current number of allocations. */
323 uint32_t cAllocations;
324
325 /** The total amount of memory available. */
326 uint64_t cbTotal;
327 /** Total amount of free memory. */
328 uint64_t cbFree;
329 /** Total amount of memory allocated. */
330 uint64_t cbAllocated;
331
332#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
333 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
334 *
335 * Since the chunk size is a power of two and the minimum chunk size is a lot
336 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
337 * require a whole number of uint64_t elements in the allocation bitmap. So,
338 * for sake of simplicity, they are allocated as one continous chunk for
339 * simplicity/laziness. */
340 uint64_t *pbmAlloc;
341 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
342 uint32_t cUnitsPerChunk;
343 /** Number of bitmap elements per chunk (for quickly locating the bitmap
344 * portion corresponding to an chunk). */
345 uint32_t cBitmapElementsPerChunk;
346#else
347 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
348 * @{ */
349 /** The size of the heap internal block header. This is used to adjust the
350 * request memory size to make sure there is exacly enough room for a header at
351 * the end of the blocks we allocate before the next 64 byte alignment line. */
352 uint32_t cbHeapBlockHdr;
353 /** The size of initial heap allocation required make sure the first
354 * allocation is correctly aligned. */
355 uint32_t cbHeapAlignTweak;
356 /** The alignment tweak allocation address. */
357 void *pvAlignTweak;
358 /** @} */
359#endif
360
361#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
362 /** Pointer to the array of unwind info running parallel to aChunks (same
363 * allocation as this structure, located after the bitmaps).
364 * (For Windows, the structures must reside in 32-bit RVA distance to the
365 * actual chunk, so they are allocated off the chunk.) */
366 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
367#endif
368
369 /** The allocation chunks. */
370 RT_FLEXIBLE_ARRAY_EXTENSION
371 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
372} IEMEXECMEMALLOCATOR;
373/** Pointer to an executable memory allocator. */
374typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
375
376/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
377#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
378
379
380static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
381
382
383/**
384 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
385 * the heap statistics.
386 */
387static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
388 uint32_t cbReq, uint32_t idxChunk)
389{
390 pExecMemAllocator->cAllocations += 1;
391 pExecMemAllocator->cbAllocated += cbReq;
392#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
393 pExecMemAllocator->cbFree -= cbReq;
394#else
395 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
396#endif
397 pExecMemAllocator->idxChunkHint = idxChunk;
398
399#ifdef RT_OS_DARWIN
400 /*
401 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
402 * on darwin. So, we mark the pages returned as read+write after alloc and
403 * expect the caller to call iemExecMemAllocatorReadyForUse when done
404 * writing to the allocation.
405 *
406 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
407 * for details.
408 */
409 /** @todo detect if this is necessary... it wasn't required on 10.15 or
410 * whatever older version it was. */
411 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
412 AssertRC(rc);
413#endif
414
415 return pvRet;
416}
417
418
419#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
420static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
421 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
422{
423 /*
424 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
425 */
426 Assert(!(cToScan & 63));
427 Assert(!(idxFirst & 63));
428 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
429 pbmAlloc += idxFirst / 64;
430
431 /*
432 * Scan the bitmap for cReqUnits of consequtive clear bits
433 */
434 /** @todo This can probably be done more efficiently for non-x86 systems. */
435 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
436 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
437 {
438 uint32_t idxAddBit = 1;
439 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
440 idxAddBit++;
441 if (idxAddBit >= cReqUnits)
442 {
443 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
444
445 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
446 pChunk->cFreeUnits -= cReqUnits;
447 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
448
449 void * const pvRet = (uint8_t *)pChunk->pvChunk
450 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
451
452 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
453 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
454 }
455
456 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
457 }
458 return NULL;
459}
460#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
461
462
463static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
464{
465#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
466 /*
467 * Figure out how much to allocate.
468 */
469 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
470 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
471 {
472 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
473 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
474 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
475 {
476 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
477 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
478 if (pvRet)
479 return pvRet;
480 }
481 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
482 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
483 cReqUnits, idxChunk);
484 }
485#else
486 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
487 if (pvRet)
488 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
489#endif
490 return NULL;
491
492}
493
494
495/**
496 * Allocates @a cbReq bytes of executable memory.
497 *
498 * @returns Pointer to the memory, NULL if out of memory or other problem
499 * encountered.
500 * @param pVCpu The cross context virtual CPU structure of the calling
501 * thread.
502 * @param cbReq How many bytes are required.
503 */
504static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
505{
506 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
507 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
508 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
509
510
511 for (unsigned iIteration = 0;; iIteration++)
512 {
513 /*
514 * Adjust the request size so it'll fit the allocator alignment/whatnot.
515 *
516 * For the RTHeapSimple allocator this means to follow the logic described
517 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
518 * existing chunks if we think we've got sufficient free memory around.
519 *
520 * While for the alternative one we just align it up to a whole unit size.
521 */
522#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
523 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
524#else
525 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
526#endif
527 if (cbReq <= pExecMemAllocator->cbFree)
528 {
529 uint32_t const cChunks = pExecMemAllocator->cChunks;
530 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
531 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
532 {
533 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
534 if (pvRet)
535 return pvRet;
536 }
537 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
538 {
539 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
540 if (pvRet)
541 return pvRet;
542 }
543 }
544
545 /*
546 * Can we grow it with another chunk?
547 */
548 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
549 {
550 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
551 AssertLogRelRCReturn(rc, NULL);
552
553 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
554 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
555 if (pvRet)
556 return pvRet;
557 AssertFailed();
558 }
559
560 /*
561 * Try prune native TBs once.
562 */
563 if (iIteration == 0)
564 iemTbAllocatorFreeupNativeSpace(pVCpu, cbReq / sizeof(IEMNATIVEINSTR));
565 else
566 {
567 /** @todo stats... */
568 return NULL;
569 }
570 }
571
572}
573
574
575/** This is a hook that we may need later for changing memory protection back
576 * to readonly+exec */
577static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
578{
579#ifdef RT_OS_DARWIN
580 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
581 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
582 AssertRC(rc); RT_NOREF(pVCpu);
583
584 /*
585 * Flush the instruction cache:
586 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
587 */
588 /* sys_dcache_flush(pv, cb); - not necessary */
589 sys_icache_invalidate(pv, cb);
590#else
591 RT_NOREF(pVCpu, pv, cb);
592#endif
593}
594
595
596/**
597 * Frees executable memory.
598 */
599void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
600{
601 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
602 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
603 Assert(pv);
604#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
605 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
606#else
607 Assert(!((uintptr_t)pv & 63));
608#endif
609
610 /* Align the size as we did when allocating the block. */
611#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
612 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
613#else
614 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
615#endif
616
617 /* Free it / assert sanity. */
618#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
619 uint32_t const cChunks = pExecMemAllocator->cChunks;
620 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
621 bool fFound = false;
622 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
623 {
624 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
625 fFound = offChunk < cbChunk;
626 if (fFound)
627 {
628#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
629 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
630 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
631
632 /* Check that it's valid and free it. */
633 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
634 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
635 for (uint32_t i = 1; i < cReqUnits; i++)
636 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
637 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
638
639 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
640 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
641
642 /* Update the stats. */
643 pExecMemAllocator->cbAllocated -= cb;
644 pExecMemAllocator->cbFree += cb;
645 pExecMemAllocator->cAllocations -= 1;
646 return;
647#else
648 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
649 break;
650#endif
651 }
652 }
653# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
654 AssertFailed();
655# else
656 Assert(fFound);
657# endif
658#endif
659
660#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
661 /* Update stats while cb is freshly calculated.*/
662 pExecMemAllocator->cbAllocated -= cb;
663 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
664 pExecMemAllocator->cAllocations -= 1;
665
666 /* Free it. */
667 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
668#endif
669}
670
671
672
673#ifdef IN_RING3
674# ifdef RT_OS_WINDOWS
675
676/**
677 * Initializes the unwind info structures for windows hosts.
678 */
679static int
680iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
681 void *pvChunk, uint32_t idxChunk)
682{
683 RT_NOREF(pVCpu);
684
685 /*
686 * The AMD64 unwind opcodes.
687 *
688 * This is a program that starts with RSP after a RET instruction that
689 * ends up in recompiled code, and the operations we describe here will
690 * restore all non-volatile registers and bring RSP back to where our
691 * RET address is. This means it's reverse order from what happens in
692 * the prologue.
693 *
694 * Note! Using a frame register approach here both because we have one
695 * and but mainly because the UWOP_ALLOC_LARGE argument values
696 * would be a pain to write initializers for. On the positive
697 * side, we're impervious to changes in the the stack variable
698 * area can can deal with dynamic stack allocations if necessary.
699 */
700 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
701 {
702 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
703 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
704 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
705 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
706 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
707 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
708 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
709 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
710 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
711 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
712 };
713 union
714 {
715 IMAGE_UNWIND_INFO Info;
716 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
717 } s_UnwindInfo =
718 {
719 {
720 /* .Version = */ 1,
721 /* .Flags = */ 0,
722 /* .SizeOfProlog = */ 16, /* whatever */
723 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
724 /* .FrameRegister = */ X86_GREG_xBP,
725 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
726 }
727 };
728 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
729 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
730
731 /*
732 * Calc how much space we need and allocate it off the exec heap.
733 */
734 unsigned const cFunctionEntries = 1;
735 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
736 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
737# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
738 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
739 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
740 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
741# else
742 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
743 - pExecMemAllocator->cbHeapBlockHdr;
744 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
745 32 /*cbAlignment*/);
746# endif
747 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
748 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
749
750 /*
751 * Initialize the structures.
752 */
753 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
754
755 paFunctions[0].BeginAddress = 0;
756 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
757 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
758
759 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
760 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
761
762 /*
763 * Register it.
764 */
765 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
766 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
767
768 return VINF_SUCCESS;
769}
770
771
772# else /* !RT_OS_WINDOWS */
773
774/**
775 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
776 */
777DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
778{
779 if (iValue >= 64)
780 {
781 Assert(iValue < 0x2000);
782 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
783 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
784 }
785 else if (iValue >= 0)
786 *Ptr.pb++ = (uint8_t)iValue;
787 else if (iValue > -64)
788 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
789 else
790 {
791 Assert(iValue > -0x2000);
792 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
793 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
794 }
795 return Ptr;
796}
797
798
799/**
800 * Emits an ULEB128 encoded value (up to 64-bit wide).
801 */
802DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
803{
804 while (uValue >= 0x80)
805 {
806 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
807 uValue >>= 7;
808 }
809 *Ptr.pb++ = (uint8_t)uValue;
810 return Ptr;
811}
812
813
814/**
815 * Emits a CFA rule as register @a uReg + offset @a off.
816 */
817DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
818{
819 *Ptr.pb++ = DW_CFA_def_cfa;
820 Ptr = iemDwarfPutUleb128(Ptr, uReg);
821 Ptr = iemDwarfPutUleb128(Ptr, off);
822 return Ptr;
823}
824
825
826/**
827 * Emits a register (@a uReg) save location:
828 * CFA + @a off * data_alignment_factor
829 */
830DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
831{
832 if (uReg < 0x40)
833 *Ptr.pb++ = DW_CFA_offset | uReg;
834 else
835 {
836 *Ptr.pb++ = DW_CFA_offset_extended;
837 Ptr = iemDwarfPutUleb128(Ptr, uReg);
838 }
839 Ptr = iemDwarfPutUleb128(Ptr, off);
840 return Ptr;
841}
842
843
844# if 0 /* unused */
845/**
846 * Emits a register (@a uReg) save location, using signed offset:
847 * CFA + @a offSigned * data_alignment_factor
848 */
849DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
850{
851 *Ptr.pb++ = DW_CFA_offset_extended_sf;
852 Ptr = iemDwarfPutUleb128(Ptr, uReg);
853 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
854 return Ptr;
855}
856# endif
857
858
859/**
860 * Initializes the unwind info section for non-windows hosts.
861 */
862static int
863iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
864 void *pvChunk, uint32_t idxChunk)
865{
866 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
867 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
868
869 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
870
871 /*
872 * Generate the CIE first.
873 */
874# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
875 uint8_t const iDwarfVer = 3;
876# else
877 uint8_t const iDwarfVer = 4;
878# endif
879 RTPTRUNION const PtrCie = Ptr;
880 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
881 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
882 *Ptr.pb++ = iDwarfVer; /* DwARF version */
883 *Ptr.pb++ = 0; /* Augmentation. */
884 if (iDwarfVer >= 4)
885 {
886 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
887 *Ptr.pb++ = 0; /* Segment selector size. */
888 }
889# ifdef RT_ARCH_AMD64
890 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
891# else
892 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
893# endif
894 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
895# ifdef RT_ARCH_AMD64
896 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
897# elif defined(RT_ARCH_ARM64)
898 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
899# else
900# error "port me"
901# endif
902 /* Initial instructions: */
903# ifdef RT_ARCH_AMD64
904 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
905 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
906 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
907 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
908 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
909 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
910 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
911 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
912# elif defined(RT_ARCH_ARM64)
913# if 1
914 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
915# else
916 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
917# endif
918 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
919 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
920 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
921 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
922 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
923 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
924 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
925 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
926 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
927 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
928 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
929 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
930 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
931 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
932# else
933# error "port me"
934# endif
935 while ((Ptr.u - PtrCie.u) & 3)
936 *Ptr.pb++ = DW_CFA_nop;
937 /* Finalize the CIE size. */
938 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
939
940 /*
941 * Generate an FDE for the whole chunk area.
942 */
943# ifdef IEMNATIVE_USE_LIBUNWIND
944 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
945# endif
946 RTPTRUNION const PtrFde = Ptr;
947 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
948 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
949 Ptr.pu32++;
950 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
951 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
952# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
953 *Ptr.pb++ = DW_CFA_nop;
954# endif
955 while ((Ptr.u - PtrFde.u) & 3)
956 *Ptr.pb++ = DW_CFA_nop;
957 /* Finalize the FDE size. */
958 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
959
960 /* Terminator entry. */
961 *Ptr.pu32++ = 0;
962 *Ptr.pu32++ = 0; /* just to be sure... */
963 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
964
965 /*
966 * Register it.
967 */
968# ifdef IEMNATIVE_USE_LIBUNWIND
969 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
970# else
971 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
972 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
973# endif
974
975# ifdef IEMNATIVE_USE_GDB_JIT
976 /*
977 * Now for telling GDB about this (experimental).
978 *
979 * This seems to work best with ET_DYN.
980 */
981 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
982# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
983 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
984 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
985# else
986 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
987 - pExecMemAllocator->cbHeapBlockHdr;
988 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
989# endif
990 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
991 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
992
993 RT_ZERO(*pSymFile);
994
995 /*
996 * The ELF header:
997 */
998 pSymFile->EHdr.e_ident[0] = ELFMAG0;
999 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1000 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1001 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1002 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1003 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1004 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1005 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1006# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1007 pSymFile->EHdr.e_type = ET_DYN;
1008# else
1009 pSymFile->EHdr.e_type = ET_REL;
1010# endif
1011# ifdef RT_ARCH_AMD64
1012 pSymFile->EHdr.e_machine = EM_AMD64;
1013# elif defined(RT_ARCH_ARM64)
1014 pSymFile->EHdr.e_machine = EM_AARCH64;
1015# else
1016# error "port me"
1017# endif
1018 pSymFile->EHdr.e_version = 1; /*?*/
1019 pSymFile->EHdr.e_entry = 0;
1020# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1021 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1022# else
1023 pSymFile->EHdr.e_phoff = 0;
1024# endif
1025 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1026 pSymFile->EHdr.e_flags = 0;
1027 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1028# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1029 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1030 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1031# else
1032 pSymFile->EHdr.e_phentsize = 0;
1033 pSymFile->EHdr.e_phnum = 0;
1034# endif
1035 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1036 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1037 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1038
1039 uint32_t offStrTab = 0;
1040#define APPEND_STR(a_szStr) do { \
1041 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1042 offStrTab += sizeof(a_szStr); \
1043 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1044 } while (0)
1045#define APPEND_STR_FMT(a_szStr, ...) do { \
1046 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1047 offStrTab++; \
1048 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1049 } while (0)
1050
1051 /*
1052 * Section headers.
1053 */
1054 /* Section header #0: NULL */
1055 unsigned i = 0;
1056 APPEND_STR("");
1057 RT_ZERO(pSymFile->aShdrs[i]);
1058 i++;
1059
1060 /* Section header: .eh_frame */
1061 pSymFile->aShdrs[i].sh_name = offStrTab;
1062 APPEND_STR(".eh_frame");
1063 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1064 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1065# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1066 pSymFile->aShdrs[i].sh_offset
1067 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1068# else
1069 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1070 pSymFile->aShdrs[i].sh_offset = 0;
1071# endif
1072
1073 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1074 pSymFile->aShdrs[i].sh_link = 0;
1075 pSymFile->aShdrs[i].sh_info = 0;
1076 pSymFile->aShdrs[i].sh_addralign = 1;
1077 pSymFile->aShdrs[i].sh_entsize = 0;
1078 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1079 i++;
1080
1081 /* Section header: .shstrtab */
1082 unsigned const iShStrTab = i;
1083 pSymFile->EHdr.e_shstrndx = iShStrTab;
1084 pSymFile->aShdrs[i].sh_name = offStrTab;
1085 APPEND_STR(".shstrtab");
1086 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1087 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1088# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1089 pSymFile->aShdrs[i].sh_offset
1090 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1091# else
1092 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1093 pSymFile->aShdrs[i].sh_offset = 0;
1094# endif
1095 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1096 pSymFile->aShdrs[i].sh_link = 0;
1097 pSymFile->aShdrs[i].sh_info = 0;
1098 pSymFile->aShdrs[i].sh_addralign = 1;
1099 pSymFile->aShdrs[i].sh_entsize = 0;
1100 i++;
1101
1102 /* Section header: .symbols */
1103 pSymFile->aShdrs[i].sh_name = offStrTab;
1104 APPEND_STR(".symtab");
1105 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1106 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1107 pSymFile->aShdrs[i].sh_offset
1108 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1109 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1110 pSymFile->aShdrs[i].sh_link = iShStrTab;
1111 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1112 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1113 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1114 i++;
1115
1116# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1117 /* Section header: .symbols */
1118 pSymFile->aShdrs[i].sh_name = offStrTab;
1119 APPEND_STR(".dynsym");
1120 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1121 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1122 pSymFile->aShdrs[i].sh_offset
1123 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1124 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1125 pSymFile->aShdrs[i].sh_link = iShStrTab;
1126 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1127 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1128 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1129 i++;
1130# endif
1131
1132# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1133 /* Section header: .dynamic */
1134 pSymFile->aShdrs[i].sh_name = offStrTab;
1135 APPEND_STR(".dynamic");
1136 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1137 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1138 pSymFile->aShdrs[i].sh_offset
1139 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1140 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1141 pSymFile->aShdrs[i].sh_link = iShStrTab;
1142 pSymFile->aShdrs[i].sh_info = 0;
1143 pSymFile->aShdrs[i].sh_addralign = 1;
1144 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1145 i++;
1146# endif
1147
1148 /* Section header: .text */
1149 unsigned const iShText = i;
1150 pSymFile->aShdrs[i].sh_name = offStrTab;
1151 APPEND_STR(".text");
1152 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1153 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1154# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1155 pSymFile->aShdrs[i].sh_offset
1156 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1157# else
1158 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1159 pSymFile->aShdrs[i].sh_offset = 0;
1160# endif
1161 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1162 pSymFile->aShdrs[i].sh_link = 0;
1163 pSymFile->aShdrs[i].sh_info = 0;
1164 pSymFile->aShdrs[i].sh_addralign = 1;
1165 pSymFile->aShdrs[i].sh_entsize = 0;
1166 i++;
1167
1168 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1169
1170# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1171 /*
1172 * The program headers:
1173 */
1174 /* Everything in a single LOAD segment: */
1175 i = 0;
1176 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1177 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1178 pSymFile->aPhdrs[i].p_offset
1179 = pSymFile->aPhdrs[i].p_vaddr
1180 = pSymFile->aPhdrs[i].p_paddr = 0;
1181 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1182 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1183 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1184 i++;
1185 /* The .dynamic segment. */
1186 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1187 pSymFile->aPhdrs[i].p_flags = PF_R;
1188 pSymFile->aPhdrs[i].p_offset
1189 = pSymFile->aPhdrs[i].p_vaddr
1190 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1191 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1192 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1193 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1194 i++;
1195
1196 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1197
1198 /*
1199 * The dynamic section:
1200 */
1201 i = 0;
1202 pSymFile->aDyn[i].d_tag = DT_SONAME;
1203 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1204 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1205 i++;
1206 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1207 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1208 i++;
1209 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1210 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1211 i++;
1212 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1213 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1214 i++;
1215 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1216 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1217 i++;
1218 pSymFile->aDyn[i].d_tag = DT_NULL;
1219 i++;
1220 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1221# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1222
1223 /*
1224 * Symbol tables:
1225 */
1226 /** @todo gdb doesn't seem to really like this ... */
1227 i = 0;
1228 pSymFile->aSymbols[i].st_name = 0;
1229 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1230 pSymFile->aSymbols[i].st_value = 0;
1231 pSymFile->aSymbols[i].st_size = 0;
1232 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1233 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1234# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1235 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1236# endif
1237 i++;
1238
1239 pSymFile->aSymbols[i].st_name = 0;
1240 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1241 pSymFile->aSymbols[i].st_value = 0;
1242 pSymFile->aSymbols[i].st_size = 0;
1243 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1244 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1245 i++;
1246
1247 pSymFile->aSymbols[i].st_name = offStrTab;
1248 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1249# if 0
1250 pSymFile->aSymbols[i].st_shndx = iShText;
1251 pSymFile->aSymbols[i].st_value = 0;
1252# else
1253 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1254 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1255# endif
1256 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1257 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1258 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1259# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1260 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1261 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1262# endif
1263 i++;
1264
1265 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1266 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1267
1268 /*
1269 * The GDB JIT entry and informing GDB.
1270 */
1271 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1272# if 1
1273 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1274# else
1275 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1276# endif
1277
1278 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1279 RTCritSectEnter(&g_IemNativeGdbJitLock);
1280 pEhFrame->GdbJitEntry.pNext = NULL;
1281 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1282 if (__jit_debug_descriptor.pTail)
1283 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1284 else
1285 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1286 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1287 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1288
1289 /* Notify GDB: */
1290 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1291 __jit_debug_register_code();
1292 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1293 RTCritSectLeave(&g_IemNativeGdbJitLock);
1294
1295# else /* !IEMNATIVE_USE_GDB_JIT */
1296 RT_NOREF(pVCpu);
1297# endif /* !IEMNATIVE_USE_GDB_JIT */
1298
1299 return VINF_SUCCESS;
1300}
1301
1302# endif /* !RT_OS_WINDOWS */
1303#endif /* IN_RING3 */
1304
1305
1306/**
1307 * Adds another chunk to the executable memory allocator.
1308 *
1309 * This is used by the init code for the initial allocation and later by the
1310 * regular allocator function when it's out of memory.
1311 */
1312static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1313{
1314 /* Check that we've room for growth. */
1315 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1316 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1317
1318 /* Allocate a chunk. */
1319#ifdef RT_OS_DARWIN
1320 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1321#else
1322 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1323#endif
1324 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1325
1326#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1327 int rc = VINF_SUCCESS;
1328#else
1329 /* Initialize the heap for the chunk. */
1330 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1331 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1332 AssertRC(rc);
1333 if (RT_SUCCESS(rc))
1334 {
1335 /*
1336 * We want the memory to be aligned on 64 byte, so the first time thru
1337 * here we do some exploratory allocations to see how we can achieve this.
1338 * On subsequent runs we only make an initial adjustment allocation, if
1339 * necessary.
1340 *
1341 * Since we own the heap implementation, we know that the internal block
1342 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1343 * so all we need to wrt allocation size adjustments is to add 32 bytes
1344 * to the size, align up by 64 bytes, and subtract 32 bytes.
1345 *
1346 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1347 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1348 * allocation to force subsequent allocations to return 64 byte aligned
1349 * user areas.
1350 */
1351 if (!pExecMemAllocator->cbHeapBlockHdr)
1352 {
1353 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1354 pExecMemAllocator->cbHeapAlignTweak = 64;
1355 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1356 32 /*cbAlignment*/);
1357 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1358
1359 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1360 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1361 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1362 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1363 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1364
1365 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1366 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1367 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1368 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1369 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1370
1371 RTHeapSimpleFree(hHeap, pvTest2);
1372 RTHeapSimpleFree(hHeap, pvTest1);
1373 }
1374 else
1375 {
1376 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1377 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1378 }
1379 if (RT_SUCCESS(rc))
1380#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1381 {
1382 /*
1383 * Add the chunk.
1384 *
1385 * This must be done before the unwind init so windows can allocate
1386 * memory from the chunk when using the alternative sub-allocator.
1387 */
1388 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1389#ifdef IN_RING3
1390 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1391#endif
1392#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1393 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1394#else
1395 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1396 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1397 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1398 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1399#endif
1400
1401 pExecMemAllocator->cChunks = idxChunk + 1;
1402 pExecMemAllocator->idxChunkHint = idxChunk;
1403
1404#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1405 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1406 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1407#else
1408 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1409 pExecMemAllocator->cbTotal += cbFree;
1410 pExecMemAllocator->cbFree += cbFree;
1411#endif
1412
1413#ifdef IN_RING3
1414 /*
1415 * Initialize the unwind information (this cannot really fail atm).
1416 * (This sets pvUnwindInfo.)
1417 */
1418 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1419 if (RT_SUCCESS(rc))
1420#endif
1421 {
1422 return VINF_SUCCESS;
1423 }
1424
1425#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1426 /* Just in case the impossible happens, undo the above up: */
1427 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1428 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1429 pExecMemAllocator->cChunks = idxChunk;
1430 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1431 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1432 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1433 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1434#endif
1435 }
1436#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1437 }
1438#endif
1439 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1440 RT_NOREF(pVCpu);
1441 return rc;
1442}
1443
1444
1445/**
1446 * Initializes the executable memory allocator for native recompilation on the
1447 * calling EMT.
1448 *
1449 * @returns VBox status code.
1450 * @param pVCpu The cross context virtual CPU structure of the calling
1451 * thread.
1452 * @param cbMax The max size of the allocator.
1453 * @param cbInitial The initial allocator size.
1454 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1455 * dependent).
1456 */
1457int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1458{
1459 /*
1460 * Validate input.
1461 */
1462 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1463 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1464 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1465 || cbChunk == 0
1466 || ( RT_IS_POWER_OF_TWO(cbChunk)
1467 && cbChunk >= _1M
1468 && cbChunk <= _256M
1469 && cbChunk <= cbMax),
1470 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1471 VERR_OUT_OF_RANGE);
1472
1473 /*
1474 * Adjust/figure out the chunk size.
1475 */
1476 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1477 {
1478 if (cbMax >= _256M)
1479 cbChunk = _64M;
1480 else
1481 {
1482 if (cbMax < _16M)
1483 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1484 else
1485 cbChunk = (uint32_t)cbMax / 4;
1486 if (!RT_IS_POWER_OF_TWO(cbChunk))
1487 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1488 }
1489 }
1490
1491 if (cbChunk > cbMax)
1492 cbMax = cbChunk;
1493 else
1494 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1495 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1496 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1497
1498 /*
1499 * Allocate and initialize the allocatore instance.
1500 */
1501 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1502#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1503 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1504 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1505 cbNeeded += cbBitmap * cMaxChunks;
1506 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1507 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1508#endif
1509#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1510 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1511 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1512#endif
1513 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1514 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1515 VERR_NO_MEMORY);
1516 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1517 pExecMemAllocator->cbChunk = cbChunk;
1518 pExecMemAllocator->cMaxChunks = cMaxChunks;
1519 pExecMemAllocator->cChunks = 0;
1520 pExecMemAllocator->idxChunkHint = 0;
1521 pExecMemAllocator->cAllocations = 0;
1522 pExecMemAllocator->cbTotal = 0;
1523 pExecMemAllocator->cbFree = 0;
1524 pExecMemAllocator->cbAllocated = 0;
1525#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1526 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1527 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1528 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1529 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1530#endif
1531#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1532 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1533#endif
1534 for (uint32_t i = 0; i < cMaxChunks; i++)
1535 {
1536#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1537 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1538 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1539#else
1540 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1541#endif
1542 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1543#ifdef IN_RING0
1544 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1545#else
1546 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1547#endif
1548 }
1549 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1550
1551 /*
1552 * Do the initial allocations.
1553 */
1554 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1555 {
1556 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1557 AssertLogRelRCReturn(rc, rc);
1558 }
1559
1560 pExecMemAllocator->idxChunkHint = 0;
1561
1562 return VINF_SUCCESS;
1563}
1564
1565
1566/*********************************************************************************************************************************
1567* Native Recompilation *
1568*********************************************************************************************************************************/
1569
1570
1571/**
1572 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1573 */
1574IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1575{
1576 pVCpu->iem.s.cInstructions += idxInstr;
1577 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1578}
1579
1580
1581/**
1582 * Used by TB code when it wants to raise a \#DE.
1583 */
1584IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseDe,(PVMCPUCC pVCpu))
1585{
1586 iemRaiseDivideErrorJmp(pVCpu);
1587#ifndef _MSC_VER
1588 return VINF_IEM_RAISED_XCPT; /* not reached */
1589#endif
1590}
1591
1592
1593/**
1594 * Used by TB code when it wants to raise a \#UD.
1595 */
1596IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
1597{
1598 iemRaiseUndefinedOpcodeJmp(pVCpu);
1599#ifndef _MSC_VER
1600 return VINF_IEM_RAISED_XCPT; /* not reached */
1601#endif
1602}
1603
1604
1605/**
1606 * Used by TB code when it wants to raise an SSE related \#UD or \#NM.
1607 *
1608 * See IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT.
1609 */
1610IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseRelated,(PVMCPUCC pVCpu))
1611{
1612 if ( (pVCpu->cpum.GstCtx.cr0 & X86_CR0_EM)
1613 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSFXSR))
1614 iemRaiseUndefinedOpcodeJmp(pVCpu);
1615 else
1616 iemRaiseDeviceNotAvailableJmp(pVCpu);
1617#ifndef _MSC_VER
1618 return VINF_IEM_RAISED_XCPT; /* not reached */
1619#endif
1620}
1621
1622
1623/**
1624 * Used by TB code when it wants to raise an AVX related \#UD or \#NM.
1625 *
1626 * See IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT.
1627 */
1628IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseAvxRelated,(PVMCPUCC pVCpu))
1629{
1630 if ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE)
1631 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE))
1632 iemRaiseUndefinedOpcodeJmp(pVCpu);
1633 else
1634 iemRaiseDeviceNotAvailableJmp(pVCpu);
1635#ifndef _MSC_VER
1636 return VINF_IEM_RAISED_XCPT; /* not reached */
1637#endif
1638}
1639
1640
1641/**
1642 * Used by TB code when it wants to raise a \#NM.
1643 */
1644IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
1645{
1646 iemRaiseDeviceNotAvailableJmp(pVCpu);
1647#ifndef _MSC_VER
1648 return VINF_IEM_RAISED_XCPT; /* not reached */
1649#endif
1650}
1651
1652
1653/**
1654 * Used by TB code when it wants to raise a \#GP(0).
1655 */
1656IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
1657{
1658 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1659#ifndef _MSC_VER
1660 return VINF_IEM_RAISED_XCPT; /* not reached */
1661#endif
1662}
1663
1664
1665/**
1666 * Used by TB code when it wants to raise a \#MF.
1667 */
1668IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
1669{
1670 iemRaiseMathFaultJmp(pVCpu);
1671#ifndef _MSC_VER
1672 return VINF_IEM_RAISED_XCPT; /* not reached */
1673#endif
1674}
1675
1676
1677/**
1678 * Used by TB code when it wants to raise a \#XF.
1679 */
1680IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
1681{
1682 iemRaiseSimdFpExceptionJmp(pVCpu);
1683#ifndef _MSC_VER
1684 return VINF_IEM_RAISED_XCPT; /* not reached */
1685#endif
1686}
1687
1688
1689/**
1690 * Used by TB code when detecting opcode changes.
1691 * @see iemThreadeFuncWorkerObsoleteTb
1692 */
1693IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
1694{
1695 /* We set fSafeToFree to false where as we're being called in the context
1696 of a TB callback function, which for native TBs means we cannot release
1697 the executable memory till we've returned our way back to iemTbExec as
1698 that return path codes via the native code generated for the TB. */
1699 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1700 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
1701 return VINF_IEM_REEXEC_BREAK;
1702}
1703
1704
1705/**
1706 * Used by TB code when we need to switch to a TB with CS.LIM checking.
1707 */
1708IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
1709{
1710 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
1711 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1712 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
1713 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
1714 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
1715 return VINF_IEM_REEXEC_BREAK;
1716}
1717
1718
1719/**
1720 * Used by TB code when we missed a PC check after a branch.
1721 */
1722IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
1723{
1724 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
1725 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1726 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
1727 pVCpu->iem.s.pbInstrBuf));
1728 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
1729 return VINF_IEM_REEXEC_BREAK;
1730}
1731
1732
1733
1734/*********************************************************************************************************************************
1735* Helpers: Segmented memory fetches and stores. *
1736*********************************************************************************************************************************/
1737
1738/**
1739 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1740 */
1741IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1742{
1743#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1744 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1745#else
1746 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1747#endif
1748}
1749
1750
1751/**
1752 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1753 * to 16 bits.
1754 */
1755IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1756{
1757#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1758 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1759#else
1760 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1761#endif
1762}
1763
1764
1765/**
1766 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1767 * to 32 bits.
1768 */
1769IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1770{
1771#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1772 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1773#else
1774 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1775#endif
1776}
1777
1778/**
1779 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1780 * to 64 bits.
1781 */
1782IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1783{
1784#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1785 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1786#else
1787 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1788#endif
1789}
1790
1791
1792/**
1793 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1794 */
1795IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1796{
1797#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1798 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1799#else
1800 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1801#endif
1802}
1803
1804
1805/**
1806 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1807 * to 32 bits.
1808 */
1809IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1810{
1811#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1812 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1813#else
1814 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1815#endif
1816}
1817
1818
1819/**
1820 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1821 * to 64 bits.
1822 */
1823IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1824{
1825#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1826 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1827#else
1828 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1829#endif
1830}
1831
1832
1833/**
1834 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1835 */
1836IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1837{
1838#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1839 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1840#else
1841 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1842#endif
1843}
1844
1845
1846/**
1847 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
1848 * to 64 bits.
1849 */
1850IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1851{
1852#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1853 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1854#else
1855 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1856#endif
1857}
1858
1859
1860/**
1861 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1862 */
1863IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1864{
1865#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1866 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
1867#else
1868 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
1869#endif
1870}
1871
1872
1873#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1874/**
1875 * Used by TB code to load 128-bit data w/ segmentation.
1876 */
1877IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
1878{
1879#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1880 iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1881#else
1882 iemMemFetchDataU128Jmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1883#endif
1884}
1885
1886
1887/**
1888 * Used by TB code to load 128-bit data w/ segmentation.
1889 */
1890IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
1891{
1892#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1893 iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1894#else
1895 iemMemFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1896#endif
1897}
1898
1899
1900/**
1901 * Used by TB code to load 128-bit data w/ segmentation.
1902 */
1903IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
1904{
1905#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1906 iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1907#else
1908 iemMemFetchDataU128NoAcJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1909#endif
1910}
1911
1912
1913/**
1914 * Used by TB code to load 256-bit data w/ segmentation.
1915 */
1916IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
1917{
1918#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1919 iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
1920#else
1921 iemMemFetchDataU256NoAcJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
1922#endif
1923}
1924
1925
1926/**
1927 * Used by TB code to load 256-bit data w/ segmentation.
1928 */
1929IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
1930{
1931#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1932 iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
1933#else
1934 iemMemFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
1935#endif
1936}
1937#endif
1938
1939
1940/**
1941 * Used by TB code to store unsigned 8-bit data w/ segmentation.
1942 */
1943IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
1944{
1945#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1946 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1947#else
1948 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1949#endif
1950}
1951
1952
1953/**
1954 * Used by TB code to store unsigned 16-bit data w/ segmentation.
1955 */
1956IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
1957{
1958#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1959 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1960#else
1961 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1962#endif
1963}
1964
1965
1966/**
1967 * Used by TB code to store unsigned 32-bit data w/ segmentation.
1968 */
1969IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
1970{
1971#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1972 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1973#else
1974 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1975#endif
1976}
1977
1978
1979/**
1980 * Used by TB code to store unsigned 64-bit data w/ segmentation.
1981 */
1982IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
1983{
1984#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1985 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1986#else
1987 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1988#endif
1989}
1990
1991
1992#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1993/**
1994 * Used by TB code to store unsigned 128-bit data w/ segmentation.
1995 */
1996IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
1997{
1998#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1999 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
2000#else
2001 iemMemStoreDataU128AlignedSseJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
2002#endif
2003}
2004
2005
2006/**
2007 * Used by TB code to store unsigned 128-bit data w/ segmentation.
2008 */
2009IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
2010{
2011#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2012 iemMemStoreDataU128NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
2013#else
2014 iemMemStoreDataU128NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
2015#endif
2016}
2017
2018
2019/**
2020 * Used by TB code to store unsigned 256-bit data w/ segmentation.
2021 */
2022IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
2023{
2024#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2025 iemMemStoreDataU256NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
2026#else
2027 iemMemStoreDataU256NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
2028#endif
2029}
2030
2031
2032/**
2033 * Used by TB code to store unsigned 256-bit data w/ segmentation.
2034 */
2035IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
2036{
2037#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2038 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
2039#else
2040 iemMemStoreDataU256AlignedAvxcJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
2041#endif
2042}
2043#endif
2044
2045
2046
2047/**
2048 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
2049 */
2050IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2051{
2052#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2053 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2054#else
2055 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2056#endif
2057}
2058
2059
2060/**
2061 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
2062 */
2063IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2064{
2065#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2066 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2067#else
2068 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2069#endif
2070}
2071
2072
2073/**
2074 * Used by TB code to store an 32-bit selector value onto a generic stack.
2075 *
2076 * Intel CPUs doesn't do write a whole dword, thus the special function.
2077 */
2078IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2079{
2080#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2081 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2082#else
2083 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2084#endif
2085}
2086
2087
2088/**
2089 * Used by TB code to push unsigned 64-bit value onto a generic stack.
2090 */
2091IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2092{
2093#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2094 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2095#else
2096 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2097#endif
2098}
2099
2100
2101/**
2102 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2103 */
2104IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2105{
2106#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2107 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2108#else
2109 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
2110#endif
2111}
2112
2113
2114/**
2115 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2116 */
2117IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2118{
2119#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2120 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2121#else
2122 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
2123#endif
2124}
2125
2126
2127/**
2128 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2129 */
2130IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2131{
2132#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2133 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2134#else
2135 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
2136#endif
2137}
2138
2139
2140
2141/*********************************************************************************************************************************
2142* Helpers: Flat memory fetches and stores. *
2143*********************************************************************************************************************************/
2144
2145/**
2146 * Used by TB code to load unsigned 8-bit data w/ flat address.
2147 * @note Zero extending the value to 64-bit to simplify assembly.
2148 */
2149IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2150{
2151#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2152 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2153#else
2154 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2155#endif
2156}
2157
2158
2159/**
2160 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2161 * to 16 bits.
2162 * @note Zero extending the value to 64-bit to simplify assembly.
2163 */
2164IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2165{
2166#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2167 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2168#else
2169 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2170#endif
2171}
2172
2173
2174/**
2175 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2176 * to 32 bits.
2177 * @note Zero extending the value to 64-bit to simplify assembly.
2178 */
2179IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2180{
2181#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2182 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2183#else
2184 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2185#endif
2186}
2187
2188
2189/**
2190 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2191 * to 64 bits.
2192 */
2193IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2194{
2195#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2196 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2197#else
2198 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2199#endif
2200}
2201
2202
2203/**
2204 * Used by TB code to load unsigned 16-bit data w/ flat address.
2205 * @note Zero extending the value to 64-bit to simplify assembly.
2206 */
2207IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2208{
2209#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2210 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2211#else
2212 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2213#endif
2214}
2215
2216
2217/**
2218 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2219 * to 32 bits.
2220 * @note Zero extending the value to 64-bit to simplify assembly.
2221 */
2222IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2223{
2224#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2225 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2226#else
2227 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2228#endif
2229}
2230
2231
2232/**
2233 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2234 * to 64 bits.
2235 * @note Zero extending the value to 64-bit to simplify assembly.
2236 */
2237IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2238{
2239#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2240 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2241#else
2242 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2243#endif
2244}
2245
2246
2247/**
2248 * Used by TB code to load unsigned 32-bit data w/ flat address.
2249 * @note Zero extending the value to 64-bit to simplify assembly.
2250 */
2251IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2252{
2253#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2254 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2255#else
2256 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2257#endif
2258}
2259
2260
2261/**
2262 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
2263 * to 64 bits.
2264 * @note Zero extending the value to 64-bit to simplify assembly.
2265 */
2266IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2267{
2268#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2269 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2270#else
2271 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2272#endif
2273}
2274
2275
2276/**
2277 * Used by TB code to load unsigned 64-bit data w/ flat address.
2278 */
2279IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2280{
2281#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2282 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2283#else
2284 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
2285#endif
2286}
2287
2288
2289#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2290/**
2291 * Used by TB code to load unsigned 128-bit data w/ flat address.
2292 */
2293IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
2294{
2295#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2296 return iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2297#else
2298 return iemMemFlatFetchDataU128Jmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2299#endif
2300}
2301
2302
2303/**
2304 * Used by TB code to load unsigned 128-bit data w/ flat address.
2305 */
2306IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
2307{
2308#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2309 return iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2310#else
2311 return iemMemFlatFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2312#endif
2313}
2314
2315
2316/**
2317 * Used by TB code to load unsigned 128-bit data w/ flat address.
2318 */
2319IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
2320{
2321#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2322 return iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2323#else
2324 return iemMemFlatFetchDataU128NoAcJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2325#endif
2326}
2327
2328
2329/**
2330 * Used by TB code to load unsigned 256-bit data w/ flat address.
2331 */
2332IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
2333{
2334#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2335 return iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
2336#else
2337 return iemMemFlatFetchDataU256NoAcJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
2338#endif
2339}
2340
2341
2342/**
2343 * Used by TB code to load unsigned 256-bit data w/ flat address.
2344 */
2345IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
2346{
2347#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2348 return iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
2349#else
2350 return iemMemFlatFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
2351#endif
2352}
2353#endif
2354
2355
2356/**
2357 * Used by TB code to store unsigned 8-bit data w/ flat address.
2358 */
2359IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
2360{
2361#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2362 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
2363#else
2364 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
2365#endif
2366}
2367
2368
2369/**
2370 * Used by TB code to store unsigned 16-bit data w/ flat address.
2371 */
2372IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2373{
2374#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2375 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
2376#else
2377 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
2378#endif
2379}
2380
2381
2382/**
2383 * Used by TB code to store unsigned 32-bit data w/ flat address.
2384 */
2385IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2386{
2387#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2388 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
2389#else
2390 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
2391#endif
2392}
2393
2394
2395/**
2396 * Used by TB code to store unsigned 64-bit data w/ flat address.
2397 */
2398IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2399{
2400#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2401 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
2402#else
2403 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
2404#endif
2405}
2406
2407
2408#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2409/**
2410 * Used by TB code to store unsigned 128-bit data w/ flat address.
2411 */
2412IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
2413{
2414#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2415 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
2416#else
2417 iemMemFlatStoreDataU128AlignedSseJmp(pVCpu, GCPtrMem, pu128Src);
2418#endif
2419}
2420
2421
2422/**
2423 * Used by TB code to store unsigned 128-bit data w/ flat address.
2424 */
2425IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
2426{
2427#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2428 iemMemStoreDataU128NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
2429#else
2430 iemMemFlatStoreDataU128NoAcJmp(pVCpu, GCPtrMem, pu128Src);
2431#endif
2432}
2433
2434
2435/**
2436 * Used by TB code to store unsigned 256-bit data w/ flat address.
2437 */
2438IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
2439{
2440#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2441 iemMemStoreDataU256NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
2442#else
2443 iemMemFlatStoreDataU256NoAcJmp(pVCpu, GCPtrMem, pu256Src);
2444#endif
2445}
2446
2447
2448/**
2449 * Used by TB code to store unsigned 256-bit data w/ flat address.
2450 */
2451IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
2452{
2453#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2454 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
2455#else
2456 iemMemFlatStoreDataU256AlignedAvxJmp(pVCpu, GCPtrMem, pu256Src);
2457#endif
2458}
2459#endif
2460
2461
2462
2463/**
2464 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
2465 */
2466IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2467{
2468#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2469 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2470#else
2471 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2472#endif
2473}
2474
2475
2476/**
2477 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
2478 */
2479IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2480{
2481#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2482 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2483#else
2484 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2485#endif
2486}
2487
2488
2489/**
2490 * Used by TB code to store a segment selector value onto a flat stack.
2491 *
2492 * Intel CPUs doesn't do write a whole dword, thus the special function.
2493 */
2494IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2495{
2496#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2497 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2498#else
2499 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2500#endif
2501}
2502
2503
2504/**
2505 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
2506 */
2507IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2508{
2509#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2510 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2511#else
2512 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2513#endif
2514}
2515
2516
2517/**
2518 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2519 */
2520IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2521{
2522#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2523 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2524#else
2525 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
2526#endif
2527}
2528
2529
2530/**
2531 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2532 */
2533IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2534{
2535#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2536 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2537#else
2538 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
2539#endif
2540}
2541
2542
2543/**
2544 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2545 */
2546IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2547{
2548#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2549 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2550#else
2551 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
2552#endif
2553}
2554
2555
2556
2557/*********************************************************************************************************************************
2558* Helpers: Segmented memory mapping. *
2559*********************************************************************************************************************************/
2560
2561/**
2562 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
2563 * segmentation.
2564 */
2565IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2566 RTGCPTR GCPtrMem, uint8_t iSegReg))
2567{
2568#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2569 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2570#else
2571 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2572#endif
2573}
2574
2575
2576/**
2577 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
2578 */
2579IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2580 RTGCPTR GCPtrMem, uint8_t iSegReg))
2581{
2582#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2583 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2584#else
2585 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2586#endif
2587}
2588
2589
2590/**
2591 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
2592 */
2593IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2594 RTGCPTR GCPtrMem, uint8_t iSegReg))
2595{
2596#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2597 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2598#else
2599 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2600#endif
2601}
2602
2603
2604/**
2605 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
2606 */
2607IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2608 RTGCPTR GCPtrMem, uint8_t iSegReg))
2609{
2610#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2611 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2612#else
2613 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2614#endif
2615}
2616
2617
2618/**
2619 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
2620 * segmentation.
2621 */
2622IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2623 RTGCPTR GCPtrMem, uint8_t iSegReg))
2624{
2625#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2626 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2627#else
2628 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2629#endif
2630}
2631
2632
2633/**
2634 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
2635 */
2636IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2637 RTGCPTR GCPtrMem, uint8_t iSegReg))
2638{
2639#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2640 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2641#else
2642 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2643#endif
2644}
2645
2646
2647/**
2648 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
2649 */
2650IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2651 RTGCPTR GCPtrMem, uint8_t iSegReg))
2652{
2653#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2654 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2655#else
2656 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2657#endif
2658}
2659
2660
2661/**
2662 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
2663 */
2664IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2665 RTGCPTR GCPtrMem, uint8_t iSegReg))
2666{
2667#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2668 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2669#else
2670 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2671#endif
2672}
2673
2674
2675/**
2676 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
2677 * segmentation.
2678 */
2679IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2680 RTGCPTR GCPtrMem, uint8_t iSegReg))
2681{
2682#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2683 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2684#else
2685 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2686#endif
2687}
2688
2689
2690/**
2691 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
2692 */
2693IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2694 RTGCPTR GCPtrMem, uint8_t iSegReg))
2695{
2696#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2697 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2698#else
2699 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2700#endif
2701}
2702
2703
2704/**
2705 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
2706 */
2707IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2708 RTGCPTR GCPtrMem, uint8_t iSegReg))
2709{
2710#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2711 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2712#else
2713 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2714#endif
2715}
2716
2717
2718/**
2719 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
2720 */
2721IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2722 RTGCPTR GCPtrMem, uint8_t iSegReg))
2723{
2724#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2725 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2726#else
2727 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2728#endif
2729}
2730
2731
2732/**
2733 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
2734 * segmentation.
2735 */
2736IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2737 RTGCPTR GCPtrMem, uint8_t iSegReg))
2738{
2739#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2740 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2741#else
2742 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2743#endif
2744}
2745
2746
2747/**
2748 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
2749 */
2750IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2751 RTGCPTR GCPtrMem, uint8_t iSegReg))
2752{
2753#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2754 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2755#else
2756 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2757#endif
2758}
2759
2760
2761/**
2762 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
2763 */
2764IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2765 RTGCPTR GCPtrMem, uint8_t iSegReg))
2766{
2767#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2768 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2769#else
2770 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2771#endif
2772}
2773
2774
2775/**
2776 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
2777 */
2778IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2779 RTGCPTR GCPtrMem, uint8_t iSegReg))
2780{
2781#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2782 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2783#else
2784 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2785#endif
2786}
2787
2788
2789/**
2790 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
2791 */
2792IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2793 RTGCPTR GCPtrMem, uint8_t iSegReg))
2794{
2795#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2796 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2797#else
2798 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2799#endif
2800}
2801
2802
2803/**
2804 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
2805 */
2806IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2807 RTGCPTR GCPtrMem, uint8_t iSegReg))
2808{
2809#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2810 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2811#else
2812 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2813#endif
2814}
2815
2816
2817/**
2818 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
2819 * segmentation.
2820 */
2821IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2822 RTGCPTR GCPtrMem, uint8_t iSegReg))
2823{
2824#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2825 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2826#else
2827 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2828#endif
2829}
2830
2831
2832/**
2833 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
2834 */
2835IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2836 RTGCPTR GCPtrMem, uint8_t iSegReg))
2837{
2838#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2839 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2840#else
2841 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2842#endif
2843}
2844
2845
2846/**
2847 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
2848 */
2849IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2850 RTGCPTR GCPtrMem, uint8_t iSegReg))
2851{
2852#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2853 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2854#else
2855 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2856#endif
2857}
2858
2859
2860/**
2861 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
2862 */
2863IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2864 RTGCPTR GCPtrMem, uint8_t iSegReg))
2865{
2866#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2867 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2868#else
2869 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2870#endif
2871}
2872
2873
2874/*********************************************************************************************************************************
2875* Helpers: Flat memory mapping. *
2876*********************************************************************************************************************************/
2877
2878/**
2879 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
2880 * address.
2881 */
2882IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2883{
2884#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2885 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2886#else
2887 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2888#endif
2889}
2890
2891
2892/**
2893 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
2894 */
2895IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2896{
2897#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2898 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2899#else
2900 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2901#endif
2902}
2903
2904
2905/**
2906 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
2907 */
2908IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2909{
2910#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2911 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2912#else
2913 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2914#endif
2915}
2916
2917
2918/**
2919 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
2920 */
2921IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2922{
2923#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2924 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2925#else
2926 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2927#endif
2928}
2929
2930
2931/**
2932 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
2933 * address.
2934 */
2935IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2936{
2937#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2938 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2939#else
2940 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2941#endif
2942}
2943
2944
2945/**
2946 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
2947 */
2948IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2949{
2950#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2951 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2952#else
2953 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2954#endif
2955}
2956
2957
2958/**
2959 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
2960 */
2961IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2962{
2963#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2964 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2965#else
2966 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2967#endif
2968}
2969
2970
2971/**
2972 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
2973 */
2974IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2975{
2976#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2977 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2978#else
2979 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2980#endif
2981}
2982
2983
2984/**
2985 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
2986 * address.
2987 */
2988IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2989{
2990#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2991 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2992#else
2993 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2994#endif
2995}
2996
2997
2998/**
2999 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
3000 */
3001IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3002{
3003#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3004 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3005#else
3006 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3007#endif
3008}
3009
3010
3011/**
3012 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
3013 */
3014IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3015{
3016#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3017 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3018#else
3019 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3020#endif
3021}
3022
3023
3024/**
3025 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
3026 */
3027IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3028{
3029#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3030 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3031#else
3032 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3033#endif
3034}
3035
3036
3037/**
3038 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
3039 * address.
3040 */
3041IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3042{
3043#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3044 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3045#else
3046 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3047#endif
3048}
3049
3050
3051/**
3052 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
3053 */
3054IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3055{
3056#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3057 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3058#else
3059 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3060#endif
3061}
3062
3063
3064/**
3065 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
3066 */
3067IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3068{
3069#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3070 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3071#else
3072 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3073#endif
3074}
3075
3076
3077/**
3078 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
3079 */
3080IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3081{
3082#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3083 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3084#else
3085 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3086#endif
3087}
3088
3089
3090/**
3091 * Used by TB code to map 80-bit float data writeonly w/ flat address.
3092 */
3093IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3094{
3095#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3096 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3097#else
3098 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3099#endif
3100}
3101
3102
3103/**
3104 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
3105 */
3106IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3107{
3108#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3109 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3110#else
3111 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3112#endif
3113}
3114
3115
3116/**
3117 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
3118 * address.
3119 */
3120IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3121{
3122#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3123 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3124#else
3125 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3126#endif
3127}
3128
3129
3130/**
3131 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
3132 */
3133IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3134{
3135#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3136 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3137#else
3138 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3139#endif
3140}
3141
3142
3143/**
3144 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
3145 */
3146IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3147{
3148#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3149 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3150#else
3151 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3152#endif
3153}
3154
3155
3156/**
3157 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
3158 */
3159IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3160{
3161#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3162 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3163#else
3164 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3165#endif
3166}
3167
3168
3169/*********************************************************************************************************************************
3170* Helpers: Commit, rollback & unmap *
3171*********************************************************************************************************************************/
3172
3173/**
3174 * Used by TB code to commit and unmap a read-write memory mapping.
3175 */
3176IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3177{
3178 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
3179}
3180
3181
3182/**
3183 * Used by TB code to commit and unmap a read-write memory mapping.
3184 */
3185IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3186{
3187 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
3188}
3189
3190
3191/**
3192 * Used by TB code to commit and unmap a write-only memory mapping.
3193 */
3194IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3195{
3196 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
3197}
3198
3199
3200/**
3201 * Used by TB code to commit and unmap a read-only memory mapping.
3202 */
3203IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3204{
3205 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
3206}
3207
3208
3209/**
3210 * Reinitializes the native recompiler state.
3211 *
3212 * Called before starting a new recompile job.
3213 */
3214static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
3215{
3216 pReNative->cLabels = 0;
3217 pReNative->bmLabelTypes = 0;
3218 pReNative->cFixups = 0;
3219#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3220 pReNative->pDbgInfo->cEntries = 0;
3221#endif
3222 pReNative->pTbOrg = pTb;
3223 pReNative->cCondDepth = 0;
3224 pReNative->uCondSeqNo = 0;
3225 pReNative->uCheckIrqSeqNo = 0;
3226 pReNative->uTlbSeqNo = 0;
3227
3228#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3229 pReNative->Core.offPc = 0;
3230 pReNative->Core.cInstrPcUpdateSkipped = 0;
3231#endif
3232#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3233 pReNative->fSimdRaiseXcptChecksEmitted = 0;
3234#endif
3235 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
3236#if IEMNATIVE_HST_GREG_COUNT < 32
3237 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
3238#endif
3239 ;
3240 pReNative->Core.bmHstRegsWithGstShadow = 0;
3241 pReNative->Core.bmGstRegShadows = 0;
3242 pReNative->Core.bmVars = 0;
3243 pReNative->Core.bmStack = 0;
3244 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
3245 pReNative->Core.u64ArgVars = UINT64_MAX;
3246
3247 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 16);
3248 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
3249 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
3250 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
3251 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
3252 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
3253 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
3254 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
3255 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
3256 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
3257 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
3258 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
3259 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
3260 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
3261 pReNative->aidxUniqueLabels[13] = UINT32_MAX;
3262 pReNative->aidxUniqueLabels[14] = UINT32_MAX;
3263 pReNative->aidxUniqueLabels[15] = UINT32_MAX;
3264
3265 /* Full host register reinit: */
3266 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
3267 {
3268 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
3269 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
3270 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
3271 }
3272
3273 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
3274 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
3275#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3276 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
3277#endif
3278#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3279 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
3280#endif
3281#ifdef IEMNATIVE_REG_FIXED_TMP1
3282 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
3283#endif
3284#ifdef IEMNATIVE_REG_FIXED_PC_DBG
3285 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
3286#endif
3287 );
3288 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3289 {
3290 fRegs &= ~RT_BIT_32(idxReg);
3291 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
3292 }
3293
3294 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
3295#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3296 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
3297#endif
3298#ifdef IEMNATIVE_REG_FIXED_TMP0
3299 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3300#endif
3301#ifdef IEMNATIVE_REG_FIXED_TMP1
3302 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
3303#endif
3304#ifdef IEMNATIVE_REG_FIXED_PC_DBG
3305 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
3306#endif
3307
3308#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3309# ifdef RT_ARCH_ARM64
3310 /*
3311 * Arm64 has 32 128-bit registers only, in order to support emulating 256-bit registers we pair
3312 * two real registers statically to one virtual for now, leaving us with only 16 256-bit registers.
3313 * We always pair v0 with v1, v2 with v3, etc. so we mark the higher register as fixed here during init
3314 * and the register allocator assumes that it will be always free when the lower is picked.
3315 */
3316 uint32_t const fFixedAdditional = UINT32_C(0xaaaaaaaa);
3317# else
3318 uint32_t const fFixedAdditional = 0;
3319# endif
3320
3321 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
3322 | fFixedAdditional
3323# if IEMNATIVE_HST_SIMD_REG_COUNT < 32
3324 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
3325# endif
3326 ;
3327 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
3328 pReNative->Core.bmGstSimdRegShadows = 0;
3329 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
3330 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
3331
3332 /* Full host register reinit: */
3333 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
3334 {
3335 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
3336 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
3337 pReNative->Core.aHstSimdRegs[i].idxVar = UINT8_MAX;
3338 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
3339 }
3340
3341 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK | fFixedAdditional;
3342 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3343 {
3344 fRegs &= ~RT_BIT_32(idxReg);
3345 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
3346 }
3347
3348#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
3349 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3350#endif
3351
3352#endif
3353
3354 return pReNative;
3355}
3356
3357
3358/**
3359 * Allocates and initializes the native recompiler state.
3360 *
3361 * This is called the first time an EMT wants to recompile something.
3362 *
3363 * @returns Pointer to the new recompiler state.
3364 * @param pVCpu The cross context virtual CPU structure of the calling
3365 * thread.
3366 * @param pTb The TB that's about to be recompiled.
3367 * @thread EMT(pVCpu)
3368 */
3369static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
3370{
3371 VMCPU_ASSERT_EMT(pVCpu);
3372
3373 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
3374 AssertReturn(pReNative, NULL);
3375
3376 /*
3377 * Try allocate all the buffers and stuff we need.
3378 */
3379 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
3380 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
3381 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
3382#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3383 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
3384#endif
3385 if (RT_LIKELY( pReNative->pInstrBuf
3386 && pReNative->paLabels
3387 && pReNative->paFixups)
3388#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3389 && pReNative->pDbgInfo
3390#endif
3391 )
3392 {
3393 /*
3394 * Set the buffer & array sizes on success.
3395 */
3396 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
3397 pReNative->cLabelsAlloc = _8K;
3398 pReNative->cFixupsAlloc = _16K;
3399#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3400 pReNative->cDbgInfoAlloc = _16K;
3401#endif
3402
3403 /* Other constant stuff: */
3404 pReNative->pVCpu = pVCpu;
3405
3406 /*
3407 * Done, just need to save it and reinit it.
3408 */
3409 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
3410 return iemNativeReInit(pReNative, pTb);
3411 }
3412
3413 /*
3414 * Failed. Cleanup and return.
3415 */
3416 AssertFailed();
3417 RTMemFree(pReNative->pInstrBuf);
3418 RTMemFree(pReNative->paLabels);
3419 RTMemFree(pReNative->paFixups);
3420#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3421 RTMemFree(pReNative->pDbgInfo);
3422#endif
3423 RTMemFree(pReNative);
3424 return NULL;
3425}
3426
3427
3428/**
3429 * Creates a label
3430 *
3431 * If the label does not yet have a defined position,
3432 * call iemNativeLabelDefine() later to set it.
3433 *
3434 * @returns Label ID. Throws VBox status code on failure, so no need to check
3435 * the return value.
3436 * @param pReNative The native recompile state.
3437 * @param enmType The label type.
3438 * @param offWhere The instruction offset of the label. UINT32_MAX if the
3439 * label is not yet defined (default).
3440 * @param uData Data associated with the lable. Only applicable to
3441 * certain type of labels. Default is zero.
3442 */
3443DECL_HIDDEN_THROW(uint32_t)
3444iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3445 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
3446{
3447 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
3448
3449 /*
3450 * Locate existing label definition.
3451 *
3452 * This is only allowed for forward declarations where offWhere=UINT32_MAX
3453 * and uData is zero.
3454 */
3455 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3456 uint32_t const cLabels = pReNative->cLabels;
3457 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
3458#ifndef VBOX_STRICT
3459 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
3460 && offWhere == UINT32_MAX
3461 && uData == 0
3462#endif
3463 )
3464 {
3465#ifndef VBOX_STRICT
3466 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
3467 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3468 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
3469 if (idxLabel < pReNative->cLabels)
3470 return idxLabel;
3471#else
3472 for (uint32_t i = 0; i < cLabels; i++)
3473 if ( paLabels[i].enmType == enmType
3474 && paLabels[i].uData == uData)
3475 {
3476 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3477 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3478 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
3479 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
3480 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3481 return i;
3482 }
3483 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
3484 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3485#endif
3486 }
3487
3488 /*
3489 * Make sure we've got room for another label.
3490 */
3491 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
3492 { /* likely */ }
3493 else
3494 {
3495 uint32_t cNew = pReNative->cLabelsAlloc;
3496 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3497 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3498 cNew *= 2;
3499 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
3500 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
3501 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
3502 pReNative->paLabels = paLabels;
3503 pReNative->cLabelsAlloc = cNew;
3504 }
3505
3506 /*
3507 * Define a new label.
3508 */
3509 paLabels[cLabels].off = offWhere;
3510 paLabels[cLabels].enmType = enmType;
3511 paLabels[cLabels].uData = uData;
3512 pReNative->cLabels = cLabels + 1;
3513
3514 Assert((unsigned)enmType < 64);
3515 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
3516
3517 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3518 {
3519 Assert(uData == 0);
3520 pReNative->aidxUniqueLabels[enmType] = cLabels;
3521 }
3522
3523 if (offWhere != UINT32_MAX)
3524 {
3525#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3526 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3527 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
3528#endif
3529 }
3530 return cLabels;
3531}
3532
3533
3534/**
3535 * Defines the location of an existing label.
3536 *
3537 * @param pReNative The native recompile state.
3538 * @param idxLabel The label to define.
3539 * @param offWhere The position.
3540 */
3541DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
3542{
3543 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
3544 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
3545 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
3546 pLabel->off = offWhere;
3547#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3548 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3549 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
3550#endif
3551}
3552
3553
3554/**
3555 * Looks up a lable.
3556 *
3557 * @returns Label ID if found, UINT32_MAX if not.
3558 */
3559static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3560 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
3561{
3562 Assert((unsigned)enmType < 64);
3563 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
3564 {
3565 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3566 return pReNative->aidxUniqueLabels[enmType];
3567
3568 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3569 uint32_t const cLabels = pReNative->cLabels;
3570 for (uint32_t i = 0; i < cLabels; i++)
3571 if ( paLabels[i].enmType == enmType
3572 && paLabels[i].uData == uData
3573 && ( paLabels[i].off == offWhere
3574 || offWhere == UINT32_MAX
3575 || paLabels[i].off == UINT32_MAX))
3576 return i;
3577 }
3578 return UINT32_MAX;
3579}
3580
3581
3582/**
3583 * Adds a fixup.
3584 *
3585 * @throws VBox status code (int) on failure.
3586 * @param pReNative The native recompile state.
3587 * @param offWhere The instruction offset of the fixup location.
3588 * @param idxLabel The target label ID for the fixup.
3589 * @param enmType The fixup type.
3590 * @param offAddend Fixup addend if applicable to the type. Default is 0.
3591 */
3592DECL_HIDDEN_THROW(void)
3593iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
3594 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
3595{
3596 Assert(idxLabel <= UINT16_MAX);
3597 Assert((unsigned)enmType <= UINT8_MAX);
3598#ifdef RT_ARCH_ARM64
3599 AssertStmt( enmType != kIemNativeFixupType_RelImm14At5
3600 || pReNative->paLabels[idxLabel].enmType >= kIemNativeLabelType_LastWholeTbBranch,
3601 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_SHORT_JMP_TO_TAIL_LABEL));
3602#endif
3603
3604 /*
3605 * Make sure we've room.
3606 */
3607 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
3608 uint32_t const cFixups = pReNative->cFixups;
3609 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
3610 { /* likely */ }
3611 else
3612 {
3613 uint32_t cNew = pReNative->cFixupsAlloc;
3614 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3615 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3616 cNew *= 2;
3617 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
3618 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
3619 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
3620 pReNative->paFixups = paFixups;
3621 pReNative->cFixupsAlloc = cNew;
3622 }
3623
3624 /*
3625 * Add the fixup.
3626 */
3627 paFixups[cFixups].off = offWhere;
3628 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
3629 paFixups[cFixups].enmType = enmType;
3630 paFixups[cFixups].offAddend = offAddend;
3631 pReNative->cFixups = cFixups + 1;
3632}
3633
3634
3635/**
3636 * Slow code path for iemNativeInstrBufEnsure.
3637 */
3638DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
3639{
3640 /* Double the buffer size till we meet the request. */
3641 uint32_t cNew = pReNative->cInstrBufAlloc;
3642 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
3643 do
3644 cNew *= 2;
3645 while (cNew < off + cInstrReq);
3646
3647 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
3648#ifdef RT_ARCH_ARM64
3649 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
3650#else
3651 uint32_t const cbMaxInstrBuf = _2M;
3652#endif
3653 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
3654
3655 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
3656 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
3657
3658#ifdef VBOX_STRICT
3659 pReNative->offInstrBufChecked = off + cInstrReq;
3660#endif
3661 pReNative->cInstrBufAlloc = cNew;
3662 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
3663}
3664
3665#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3666
3667/**
3668 * Grows the static debug info array used during recompilation.
3669 *
3670 * @returns Pointer to the new debug info block; throws VBox status code on
3671 * failure, so no need to check the return value.
3672 */
3673DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3674{
3675 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
3676 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
3677 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
3678 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
3679 pReNative->pDbgInfo = pDbgInfo;
3680 pReNative->cDbgInfoAlloc = cNew;
3681 return pDbgInfo;
3682}
3683
3684
3685/**
3686 * Adds a new debug info uninitialized entry, returning the pointer to it.
3687 */
3688DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3689{
3690 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
3691 { /* likely */ }
3692 else
3693 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
3694 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
3695}
3696
3697
3698/**
3699 * Debug Info: Adds a native offset record, if necessary.
3700 */
3701DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3702{
3703 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
3704
3705 /*
3706 * Search backwards to see if we've got a similar record already.
3707 */
3708 uint32_t idx = pDbgInfo->cEntries;
3709 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
3710 while (idx-- > idxStop)
3711 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
3712 {
3713 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
3714 return;
3715 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
3716 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
3717 break;
3718 }
3719
3720 /*
3721 * Add it.
3722 */
3723 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
3724 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
3725 pEntry->NativeOffset.offNative = off;
3726}
3727
3728
3729/**
3730 * Debug Info: Record info about a label.
3731 */
3732static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
3733{
3734 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3735 pEntry->Label.uType = kIemTbDbgEntryType_Label;
3736 pEntry->Label.uUnused = 0;
3737 pEntry->Label.enmLabel = (uint8_t)enmType;
3738 pEntry->Label.uData = uData;
3739}
3740
3741
3742/**
3743 * Debug Info: Record info about a threaded call.
3744 */
3745static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
3746{
3747 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3748 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
3749 pEntry->ThreadedCall.fRecompiled = fRecompiled;
3750 pEntry->ThreadedCall.uUnused = 0;
3751 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
3752}
3753
3754
3755/**
3756 * Debug Info: Record info about a new guest instruction.
3757 */
3758static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
3759{
3760 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3761 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
3762 pEntry->GuestInstruction.uUnused = 0;
3763 pEntry->GuestInstruction.fExec = fExec;
3764}
3765
3766
3767/**
3768 * Debug Info: Record info about guest register shadowing.
3769 */
3770DECL_HIDDEN_THROW(void)
3771iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
3772 uint8_t idxHstReg /*= UINT8_MAX*/, uint8_t idxHstRegPrev /*= UINT8_MAX*/)
3773{
3774 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3775 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
3776 pEntry->GuestRegShadowing.uUnused = 0;
3777 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
3778 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
3779 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
3780}
3781
3782
3783# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3784/**
3785 * Debug Info: Record info about guest register shadowing.
3786 */
3787DECL_HIDDEN_THROW(void)
3788iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
3789 uint8_t idxHstSimdReg /*= UINT8_MAX*/, uint8_t idxHstSimdRegPrev /*= UINT8_MAX*/)
3790{
3791 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3792 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
3793 pEntry->GuestSimdRegShadowing.uUnused = 0;
3794 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
3795 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
3796 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
3797}
3798# endif
3799
3800
3801# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3802/**
3803 * Debug Info: Record info about delayed RIP updates.
3804 */
3805DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t offPc, uint32_t cInstrSkipped)
3806{
3807 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3808 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
3809 pEntry->DelayedPcUpdate.offPc = offPc;
3810 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
3811}
3812# endif
3813
3814#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
3815
3816
3817/*********************************************************************************************************************************
3818* Register Allocator *
3819*********************************************************************************************************************************/
3820
3821/**
3822 * Register parameter indexes (indexed by argument number).
3823 */
3824DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
3825{
3826 IEMNATIVE_CALL_ARG0_GREG,
3827 IEMNATIVE_CALL_ARG1_GREG,
3828 IEMNATIVE_CALL_ARG2_GREG,
3829 IEMNATIVE_CALL_ARG3_GREG,
3830#if defined(IEMNATIVE_CALL_ARG4_GREG)
3831 IEMNATIVE_CALL_ARG4_GREG,
3832# if defined(IEMNATIVE_CALL_ARG5_GREG)
3833 IEMNATIVE_CALL_ARG5_GREG,
3834# if defined(IEMNATIVE_CALL_ARG6_GREG)
3835 IEMNATIVE_CALL_ARG6_GREG,
3836# if defined(IEMNATIVE_CALL_ARG7_GREG)
3837 IEMNATIVE_CALL_ARG7_GREG,
3838# endif
3839# endif
3840# endif
3841#endif
3842};
3843AssertCompile(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3844
3845/**
3846 * Call register masks indexed by argument count.
3847 */
3848DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
3849{
3850 0,
3851 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
3852 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
3853 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
3854 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3855 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
3856#if defined(IEMNATIVE_CALL_ARG4_GREG)
3857 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3858 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
3859# if defined(IEMNATIVE_CALL_ARG5_GREG)
3860 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3861 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
3862# if defined(IEMNATIVE_CALL_ARG6_GREG)
3863 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3864 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3865 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
3866# if defined(IEMNATIVE_CALL_ARG7_GREG)
3867 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3868 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3869 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
3870# endif
3871# endif
3872# endif
3873#endif
3874};
3875
3876#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
3877/**
3878 * BP offset of the stack argument slots.
3879 *
3880 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
3881 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
3882 */
3883DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
3884{
3885 IEMNATIVE_FP_OFF_STACK_ARG0,
3886# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
3887 IEMNATIVE_FP_OFF_STACK_ARG1,
3888# endif
3889# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
3890 IEMNATIVE_FP_OFF_STACK_ARG2,
3891# endif
3892# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
3893 IEMNATIVE_FP_OFF_STACK_ARG3,
3894# endif
3895};
3896AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
3897#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
3898
3899/**
3900 * Info about shadowed guest register values.
3901 * @see IEMNATIVEGSTREG
3902 */
3903DECL_HIDDEN_CONST(IEMANTIVEGSTREGINFO const) g_aGstShadowInfo[] =
3904{
3905#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
3906 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
3907 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
3908 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
3909 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
3910 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
3911 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
3912 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
3913 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
3914 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
3915 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
3916 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
3917 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
3918 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
3919 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
3920 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
3921 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
3922 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
3923 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
3924 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
3925 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
3926 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
3927 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
3928 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
3929 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
3930 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
3931 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
3932 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
3933 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
3934 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
3935 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
3936 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
3937 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
3938 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
3939 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
3940 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
3941 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
3942 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
3943 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
3944 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
3945 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
3946 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
3947 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
3948 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
3949 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
3950 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
3951 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
3952 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
3953 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
3954#undef CPUMCTX_OFF_AND_SIZE
3955};
3956AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
3957
3958
3959/** Host CPU general purpose register names. */
3960DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
3961{
3962#ifdef RT_ARCH_AMD64
3963 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
3964#elif RT_ARCH_ARM64
3965 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
3966 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
3967#else
3968# error "port me"
3969#endif
3970};
3971
3972
3973#if 0 /* unused */
3974/**
3975 * Tries to locate a suitable register in the given register mask.
3976 *
3977 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3978 * failed.
3979 *
3980 * @returns Host register number on success, returns UINT8_MAX on failure.
3981 */
3982static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
3983{
3984 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3985 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3986 if (fRegs)
3987 {
3988 /** @todo pick better here: */
3989 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
3990
3991 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3992 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3993 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3994 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3995
3996 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3997 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3998 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3999 return idxReg;
4000 }
4001 return UINT8_MAX;
4002}
4003#endif /* unused */
4004
4005
4006/**
4007 * Locate a register, possibly freeing one up.
4008 *
4009 * This ASSUMES the caller has done the minimal/optimal allocation checks and
4010 * failed.
4011 *
4012 * @returns Host register number on success. Returns UINT8_MAX if no registers
4013 * found, the caller is supposed to deal with this and raise a
4014 * allocation type specific status code (if desired).
4015 *
4016 * @throws VBox status code if we're run into trouble spilling a variable of
4017 * recording debug info. Does NOT throw anything if we're out of
4018 * registers, though.
4019 */
4020static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
4021 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
4022{
4023 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
4024 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
4025 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
4026
4027 /*
4028 * Try a freed register that's shadowing a guest register.
4029 */
4030 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
4031 if (fRegs)
4032 {
4033 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
4034
4035#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4036 /*
4037 * When we have livness information, we use it to kick out all shadowed
4038 * guest register that will not be needed any more in this TB. If we're
4039 * lucky, this may prevent us from ending up here again.
4040 *
4041 * Note! We must consider the previous entry here so we don't free
4042 * anything that the current threaded function requires (current
4043 * entry is produced by the next threaded function).
4044 */
4045 uint32_t const idxCurCall = pReNative->idxCurCall;
4046 if (idxCurCall > 0)
4047 {
4048 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
4049
4050# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4051 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
4052 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
4053 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
4054#else
4055 /* Construct a mask of the registers not in the read or write state.
4056 Note! We could skips writes, if they aren't from us, as this is just
4057 a hack to prevent trashing registers that have just been written
4058 or will be written when we retire the current instruction. */
4059 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
4060 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
4061 & IEMLIVENESSBIT_MASK;
4062#endif
4063 /* Merge EFLAGS. */
4064 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
4065 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
4066 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
4067 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
4068 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
4069
4070 /* If it matches any shadowed registers. */
4071 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
4072 {
4073 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
4074 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
4075 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
4076
4077 /* See if we've got any unshadowed registers we can return now. */
4078 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
4079 if (fUnshadowedRegs)
4080 {
4081 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
4082 return (fPreferVolatile
4083 ? ASMBitFirstSetU32(fUnshadowedRegs)
4084 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4085 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
4086 - 1;
4087 }
4088 }
4089 }
4090#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4091
4092 unsigned const idxReg = (fPreferVolatile
4093 ? ASMBitFirstSetU32(fRegs)
4094 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4095 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
4096 - 1;
4097
4098 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4099 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4100 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4101 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4102
4103 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4104 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4105 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4106 return idxReg;
4107 }
4108
4109 /*
4110 * Try free up a variable that's in a register.
4111 *
4112 * We do two rounds here, first evacuating variables we don't need to be
4113 * saved on the stack, then in the second round move things to the stack.
4114 */
4115 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
4116 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
4117 {
4118 uint32_t fVars = pReNative->Core.bmVars;
4119 while (fVars)
4120 {
4121 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
4122 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
4123 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
4124 && (RT_BIT_32(idxReg) & fRegMask)
4125 && ( iLoop == 0
4126 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
4127 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
4128 && !pReNative->Core.aVars[idxVar].fRegAcquired)
4129 {
4130 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
4131 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
4132 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4133 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4134 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4135 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
4136
4137 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
4138 {
4139 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
4140 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
4141 }
4142
4143 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4144 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
4145
4146 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4147 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4148 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4149 return idxReg;
4150 }
4151 fVars &= ~RT_BIT_32(idxVar);
4152 }
4153 }
4154
4155 return UINT8_MAX;
4156}
4157
4158
4159/**
4160 * Reassigns a variable to a different register specified by the caller.
4161 *
4162 * @returns The new code buffer position.
4163 * @param pReNative The native recompile state.
4164 * @param off The current code buffer position.
4165 * @param idxVar The variable index.
4166 * @param idxRegOld The old host register number.
4167 * @param idxRegNew The new host register number.
4168 * @param pszCaller The caller for logging.
4169 */
4170static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4171 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
4172{
4173 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4174 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
4175#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4176 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4177#endif
4178 RT_NOREF(pszCaller);
4179
4180 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
4181
4182 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
4183 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
4184 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
4185 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
4186
4187 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
4188 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
4189 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
4190 if (fGstRegShadows)
4191 {
4192 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
4193 | RT_BIT_32(idxRegNew);
4194 while (fGstRegShadows)
4195 {
4196 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4197 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4198
4199 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
4200 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
4201 }
4202 }
4203
4204 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
4205 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
4206 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
4207 return off;
4208}
4209
4210
4211/**
4212 * Moves a variable to a different register or spills it onto the stack.
4213 *
4214 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
4215 * kinds can easily be recreated if needed later.
4216 *
4217 * @returns The new code buffer position.
4218 * @param pReNative The native recompile state.
4219 * @param off The current code buffer position.
4220 * @param idxVar The variable index.
4221 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
4222 * call-volatile registers.
4223 */
4224DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4225 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_GREG_MASK*/)
4226{
4227 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4228 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4229 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
4230 Assert(!pVar->fRegAcquired);
4231
4232 uint8_t const idxRegOld = pVar->idxReg;
4233 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
4234 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
4235 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
4236 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
4237 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
4238 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4239 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
4240 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
4241
4242
4243 /** @todo Add statistics on this.*/
4244 /** @todo Implement basic variable liveness analysis (python) so variables
4245 * can be freed immediately once no longer used. This has the potential to
4246 * be trashing registers and stack for dead variables.
4247 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
4248
4249 /*
4250 * First try move it to a different register, as that's cheaper.
4251 */
4252 fForbiddenRegs |= RT_BIT_32(idxRegOld);
4253 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
4254 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
4255 if (fRegs)
4256 {
4257 /* Avoid using shadow registers, if possible. */
4258 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
4259 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
4260 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
4261 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
4262 }
4263
4264 /*
4265 * Otherwise we must spill the register onto the stack.
4266 */
4267 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
4268 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
4269 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
4270 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
4271
4272 pVar->idxReg = UINT8_MAX;
4273 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
4274 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
4275 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
4276 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
4277 return off;
4278}
4279
4280
4281/**
4282 * Allocates a temporary host general purpose register.
4283 *
4284 * This may emit code to save register content onto the stack in order to free
4285 * up a register.
4286 *
4287 * @returns The host register number; throws VBox status code on failure,
4288 * so no need to check the return value.
4289 * @param pReNative The native recompile state.
4290 * @param poff Pointer to the variable with the code buffer position.
4291 * This will be update if we need to move a variable from
4292 * register to stack in order to satisfy the request.
4293 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4294 * registers (@c true, default) or the other way around
4295 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4296 */
4297DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
4298{
4299 /*
4300 * Try find a completely unused register, preferably a call-volatile one.
4301 */
4302 uint8_t idxReg;
4303 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4304 & ~pReNative->Core.bmHstRegsWithGstShadow
4305 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
4306 if (fRegs)
4307 {
4308 if (fPreferVolatile)
4309 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4310 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4311 else
4312 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4313 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4314 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4315 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4316 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4317 }
4318 else
4319 {
4320 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
4321 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4322 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4323 }
4324 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4325}
4326
4327
4328/**
4329 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
4330 * registers.
4331 *
4332 * @returns The host register number; throws VBox status code on failure,
4333 * so no need to check the return value.
4334 * @param pReNative The native recompile state.
4335 * @param poff Pointer to the variable with the code buffer position.
4336 * This will be update if we need to move a variable from
4337 * register to stack in order to satisfy the request.
4338 * @param fRegMask Mask of acceptable registers.
4339 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4340 * registers (@c true, default) or the other way around
4341 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4342 */
4343DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
4344 bool fPreferVolatile /*= true*/)
4345{
4346 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
4347 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
4348
4349 /*
4350 * Try find a completely unused register, preferably a call-volatile one.
4351 */
4352 uint8_t idxReg;
4353 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4354 & ~pReNative->Core.bmHstRegsWithGstShadow
4355 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
4356 & fRegMask;
4357 if (fRegs)
4358 {
4359 if (fPreferVolatile)
4360 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4361 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4362 else
4363 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4364 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4365 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4366 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4367 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4368 }
4369 else
4370 {
4371 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
4372 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4373 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4374 }
4375 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4376}
4377
4378
4379/**
4380 * Allocates a temporary register for loading an immediate value into.
4381 *
4382 * This will emit code to load the immediate, unless there happens to be an
4383 * unused register with the value already loaded.
4384 *
4385 * The caller will not modify the returned register, it must be considered
4386 * read-only. Free using iemNativeRegFreeTmpImm.
4387 *
4388 * @returns The host register number; throws VBox status code on failure, so no
4389 * need to check the return value.
4390 * @param pReNative The native recompile state.
4391 * @param poff Pointer to the variable with the code buffer position.
4392 * @param uImm The immediate value that the register must hold upon
4393 * return.
4394 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4395 * registers (@c true, default) or the other way around
4396 * (@c false).
4397 *
4398 * @note Reusing immediate values has not been implemented yet.
4399 */
4400DECL_HIDDEN_THROW(uint8_t)
4401iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
4402{
4403 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
4404 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
4405 return idxReg;
4406}
4407
4408
4409/**
4410 * Allocates a temporary host general purpose register for keeping a guest
4411 * register value.
4412 *
4413 * Since we may already have a register holding the guest register value,
4414 * code will be emitted to do the loading if that's not the case. Code may also
4415 * be emitted if we have to free up a register to satify the request.
4416 *
4417 * @returns The host register number; throws VBox status code on failure, so no
4418 * need to check the return value.
4419 * @param pReNative The native recompile state.
4420 * @param poff Pointer to the variable with the code buffer
4421 * position. This will be update if we need to move a
4422 * variable from register to stack in order to satisfy
4423 * the request.
4424 * @param enmGstReg The guest register that will is to be updated.
4425 * @param enmIntendedUse How the caller will be using the host register.
4426 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
4427 * register is okay (default). The ASSUMPTION here is
4428 * that the caller has already flushed all volatile
4429 * registers, so this is only applied if we allocate a
4430 * new register.
4431 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
4432 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
4433 */
4434DECL_HIDDEN_THROW(uint8_t)
4435iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
4436 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
4437 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
4438{
4439 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4440#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4441 AssertMsg( fSkipLivenessAssert
4442 || pReNative->idxCurCall == 0
4443 || enmGstReg == kIemNativeGstReg_Pc
4444 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4445 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4446 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
4447 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4448 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
4449 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4450#endif
4451 RT_NOREF(fSkipLivenessAssert);
4452#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
4453 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
4454#endif
4455 uint32_t const fRegMask = !fNoVolatileRegs
4456 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
4457 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4458
4459 /*
4460 * First check if the guest register value is already in a host register.
4461 */
4462 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4463 {
4464 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4465 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4466 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4467 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4468
4469 /* It's not supposed to be allocated... */
4470 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4471 {
4472 /*
4473 * If the register will trash the guest shadow copy, try find a
4474 * completely unused register we can use instead. If that fails,
4475 * we need to disassociate the host reg from the guest reg.
4476 */
4477 /** @todo would be nice to know if preserving the register is in any way helpful. */
4478 /* If the purpose is calculations, try duplicate the register value as
4479 we'll be clobbering the shadow. */
4480 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
4481 && ( ~pReNative->Core.bmHstRegs
4482 & ~pReNative->Core.bmHstRegsWithGstShadow
4483 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
4484 {
4485 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
4486
4487 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4488
4489 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4490 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4491 g_apszIemNativeHstRegNames[idxRegNew]));
4492 idxReg = idxRegNew;
4493 }
4494 /* If the current register matches the restrictions, go ahead and allocate
4495 it for the caller. */
4496 else if (fRegMask & RT_BIT_32(idxReg))
4497 {
4498 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4499 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4500 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4501 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4502 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
4503 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4504 else
4505 {
4506 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
4507 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
4508 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4509 }
4510 }
4511 /* Otherwise, allocate a register that satisfies the caller and transfer
4512 the shadowing if compatible with the intended use. (This basically
4513 means the call wants a non-volatile register (RSP push/pop scenario).) */
4514 else
4515 {
4516 Assert(fNoVolatileRegs);
4517 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
4518 !fNoVolatileRegs
4519 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
4520 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4521 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4522 {
4523 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4524 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
4525 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
4526 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4527 }
4528 else
4529 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4530 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4531 g_apszIemNativeHstRegNames[idxRegNew]));
4532 idxReg = idxRegNew;
4533 }
4534 }
4535 else
4536 {
4537 /*
4538 * Oops. Shadowed guest register already allocated!
4539 *
4540 * Allocate a new register, copy the value and, if updating, the
4541 * guest shadow copy assignment to the new register.
4542 */
4543 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4544 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
4545 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
4546 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
4547
4548 /** @todo share register for readonly access. */
4549 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
4550 enmIntendedUse == kIemNativeGstRegUse_Calculation);
4551
4552 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4553 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4554
4555 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4556 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4557 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
4558 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4559 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4560 else
4561 {
4562 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4563 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
4564 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4565 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4566 }
4567 idxReg = idxRegNew;
4568 }
4569 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
4570
4571#ifdef VBOX_STRICT
4572 /* Strict builds: Check that the value is correct. */
4573 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4574#endif
4575
4576 return idxReg;
4577 }
4578
4579 /*
4580 * Allocate a new register, load it with the guest value and designate it as a copy of the
4581 */
4582 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
4583
4584 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4585 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
4586
4587 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4588 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
4589 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
4590 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4591
4592 return idxRegNew;
4593}
4594
4595
4596/**
4597 * Allocates a temporary host general purpose register that already holds the
4598 * given guest register value.
4599 *
4600 * The use case for this function is places where the shadowing state cannot be
4601 * modified due to branching and such. This will fail if the we don't have a
4602 * current shadow copy handy or if it's incompatible. The only code that will
4603 * be emitted here is value checking code in strict builds.
4604 *
4605 * The intended use can only be readonly!
4606 *
4607 * @returns The host register number, UINT8_MAX if not present.
4608 * @param pReNative The native recompile state.
4609 * @param poff Pointer to the instruction buffer offset.
4610 * Will be updated in strict builds if a register is
4611 * found.
4612 * @param enmGstReg The guest register that will is to be updated.
4613 * @note In strict builds, this may throw instruction buffer growth failures.
4614 * Non-strict builds will not throw anything.
4615 * @sa iemNativeRegAllocTmpForGuestReg
4616 */
4617DECL_HIDDEN_THROW(uint8_t)
4618iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4619{
4620 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4621#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4622 AssertMsg( pReNative->idxCurCall == 0
4623 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4624 || enmGstReg == kIemNativeGstReg_Pc,
4625 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4626#endif
4627
4628 /*
4629 * First check if the guest register value is already in a host register.
4630 */
4631 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4632 {
4633 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4634 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4635 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4636 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4637
4638 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4639 {
4640 /*
4641 * We only do readonly use here, so easy compared to the other
4642 * variant of this code.
4643 */
4644 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4645 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4646 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4647 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
4648 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4649
4650#ifdef VBOX_STRICT
4651 /* Strict builds: Check that the value is correct. */
4652 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4653#else
4654 RT_NOREF(poff);
4655#endif
4656 return idxReg;
4657 }
4658 }
4659
4660 return UINT8_MAX;
4661}
4662
4663
4664/**
4665 * Allocates argument registers for a function call.
4666 *
4667 * @returns New code buffer offset on success; throws VBox status code on failure, so no
4668 * need to check the return value.
4669 * @param pReNative The native recompile state.
4670 * @param off The current code buffer offset.
4671 * @param cArgs The number of arguments the function call takes.
4672 */
4673DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
4674{
4675 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
4676 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
4677 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4678 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4679
4680 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4681 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4682 else if (cArgs == 0)
4683 return true;
4684
4685 /*
4686 * Do we get luck and all register are free and not shadowing anything?
4687 */
4688 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
4689 for (uint32_t i = 0; i < cArgs; i++)
4690 {
4691 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4692 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4693 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4694 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4695 }
4696 /*
4697 * Okay, not lucky so we have to free up the registers.
4698 */
4699 else
4700 for (uint32_t i = 0; i < cArgs; i++)
4701 {
4702 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4703 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
4704 {
4705 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4706 {
4707 case kIemNativeWhat_Var:
4708 {
4709 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4710 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4711 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
4712 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4713 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
4714#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4715 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4716#endif
4717
4718 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
4719 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4720 else
4721 {
4722 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4723 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4724 }
4725 break;
4726 }
4727
4728 case kIemNativeWhat_Tmp:
4729 case kIemNativeWhat_Arg:
4730 case kIemNativeWhat_rc:
4731 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4732 default:
4733 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
4734 }
4735
4736 }
4737 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4738 {
4739 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4740 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4741 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4742 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4743 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4744 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4745 }
4746 else
4747 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4748 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4749 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4750 }
4751 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
4752 return true;
4753}
4754
4755
4756DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
4757
4758
4759#if 0
4760/**
4761 * Frees a register assignment of any type.
4762 *
4763 * @param pReNative The native recompile state.
4764 * @param idxHstReg The register to free.
4765 *
4766 * @note Does not update variables.
4767 */
4768DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4769{
4770 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4771 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4772 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
4773 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
4774 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
4775 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
4776 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
4777 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
4778 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
4779 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
4780 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4781 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4782 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4783 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4784
4785 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4786 /* no flushing, right:
4787 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4788 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4789 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4790 */
4791}
4792#endif
4793
4794
4795/**
4796 * Frees a temporary register.
4797 *
4798 * Any shadow copies of guest registers assigned to the host register will not
4799 * be flushed by this operation.
4800 */
4801DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4802{
4803 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4804 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
4805 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4806 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
4807 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4808}
4809
4810
4811/**
4812 * Frees a temporary immediate register.
4813 *
4814 * It is assumed that the call has not modified the register, so it still hold
4815 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
4816 */
4817DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4818{
4819 iemNativeRegFreeTmp(pReNative, idxHstReg);
4820}
4821
4822
4823/**
4824 * Frees a register assigned to a variable.
4825 *
4826 * The register will be disassociated from the variable.
4827 */
4828DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4829{
4830 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4831 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4832 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4833 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4834 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
4835#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4836 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4837#endif
4838
4839 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4840 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4841 if (!fFlushShadows)
4842 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4843 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4844 else
4845 {
4846 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4847 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4848 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4849 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4850 uint64_t fGstRegShadows = fGstRegShadowsOld;
4851 while (fGstRegShadows)
4852 {
4853 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4854 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4855
4856 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4857 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4858 }
4859 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4860 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4861 }
4862}
4863
4864
4865#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4866# ifdef LOG_ENABLED
4867/** Host CPU SIMD register names. */
4868DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
4869{
4870# ifdef RT_ARCH_AMD64
4871 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
4872# elif RT_ARCH_ARM64
4873 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
4874 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
4875# else
4876# error "port me"
4877# endif
4878};
4879# endif
4880
4881
4882/**
4883 * Frees a SIMD register assigned to a variable.
4884 *
4885 * The register will be disassociated from the variable.
4886 */
4887DECLHIDDEN(void) iemNativeSimdRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4888{
4889 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstReg));
4890 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4891 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
4892 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4893 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
4894 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4895
4896 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4897 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
4898 if (!fFlushShadows)
4899 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4900 g_apszIemNativeHstSimdRegNames[idxHstReg], pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows, idxVar));
4901 else
4902 {
4903 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4904 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows;
4905 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
4906 pReNative->Core.bmGstSimdRegShadows &= ~fGstRegShadowsOld;
4907 uint64_t fGstRegShadows = fGstRegShadowsOld;
4908 while (fGstRegShadows)
4909 {
4910 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4911 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4912
4913 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxHstReg);
4914 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = UINT8_MAX;
4915 }
4916 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4917 g_apszIemNativeHstSimdRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4918 }
4919}
4920#endif
4921
4922
4923/**
4924 * Called right before emitting a call instruction to move anything important
4925 * out of call-volatile registers, free and flush the call-volatile registers,
4926 * optionally freeing argument variables.
4927 *
4928 * @returns New code buffer offset, UINT32_MAX on failure.
4929 * @param pReNative The native recompile state.
4930 * @param off The code buffer offset.
4931 * @param cArgs The number of arguments the function call takes.
4932 * It is presumed that the host register part of these have
4933 * been allocated as such already and won't need moving,
4934 * just freeing.
4935 * @param fKeepVars Mask of variables that should keep their register
4936 * assignments. Caller must take care to handle these.
4937 */
4938DECL_HIDDEN_THROW(uint32_t)
4939iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4940{
4941 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4942
4943 /* fKeepVars will reduce this mask. */
4944 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4945
4946 /*
4947 * Move anything important out of volatile registers.
4948 */
4949 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4950 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4951 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4952#ifdef IEMNATIVE_REG_FIXED_TMP0
4953 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4954#endif
4955#ifdef IEMNATIVE_REG_FIXED_TMP1
4956 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
4957#endif
4958#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4959 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4960#endif
4961 & ~g_afIemNativeCallRegs[cArgs];
4962
4963 fRegsToMove &= pReNative->Core.bmHstRegs;
4964 if (!fRegsToMove)
4965 { /* likely */ }
4966 else
4967 {
4968 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4969 while (fRegsToMove != 0)
4970 {
4971 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4972 fRegsToMove &= ~RT_BIT_32(idxReg);
4973
4974 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4975 {
4976 case kIemNativeWhat_Var:
4977 {
4978 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4979 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4980 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4981 Assert(pVar->idxReg == idxReg);
4982 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4983 {
4984 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4985 idxVar, pVar->enmKind, pVar->idxReg));
4986 if (pVar->enmKind != kIemNativeVarKind_Stack)
4987 pVar->idxReg = UINT8_MAX;
4988 else
4989 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4990 }
4991 else
4992 fRegsToFree &= ~RT_BIT_32(idxReg);
4993 continue;
4994 }
4995
4996 case kIemNativeWhat_Arg:
4997 AssertMsgFailed(("What?!?: %u\n", idxReg));
4998 continue;
4999
5000 case kIemNativeWhat_rc:
5001 case kIemNativeWhat_Tmp:
5002 AssertMsgFailed(("Missing free: %u\n", idxReg));
5003 continue;
5004
5005 case kIemNativeWhat_FixedTmp:
5006 case kIemNativeWhat_pVCpuFixed:
5007 case kIemNativeWhat_pCtxFixed:
5008 case kIemNativeWhat_PcShadow:
5009 case kIemNativeWhat_FixedReserved:
5010 case kIemNativeWhat_Invalid:
5011 case kIemNativeWhat_End:
5012 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
5013 }
5014 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
5015 }
5016 }
5017
5018 /*
5019 * Do the actual freeing.
5020 */
5021 if (pReNative->Core.bmHstRegs & fRegsToFree)
5022 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
5023 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
5024 pReNative->Core.bmHstRegs &= ~fRegsToFree;
5025
5026 /* If there are guest register shadows in any call-volatile register, we
5027 have to clear the corrsponding guest register masks for each register. */
5028 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
5029 if (fHstRegsWithGstShadow)
5030 {
5031 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
5032 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
5033 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
5034 do
5035 {
5036 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
5037 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5038
5039 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
5040 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
5041 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
5042 } while (fHstRegsWithGstShadow != 0);
5043 }
5044
5045 return off;
5046}
5047
5048
5049/**
5050 * Flushes a set of guest register shadow copies.
5051 *
5052 * This is usually done after calling a threaded function or a C-implementation
5053 * of an instruction.
5054 *
5055 * @param pReNative The native recompile state.
5056 * @param fGstRegs Set of guest registers to flush.
5057 */
5058DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
5059{
5060 /*
5061 * Reduce the mask by what's currently shadowed
5062 */
5063 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
5064 fGstRegs &= bmGstRegShadowsOld;
5065 if (fGstRegs)
5066 {
5067 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
5068 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
5069 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
5070 if (bmGstRegShadowsNew)
5071 {
5072 /*
5073 * Partial.
5074 */
5075 do
5076 {
5077 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5078 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5079 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5080 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
5081 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5082
5083 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
5084 fGstRegs &= ~fInThisHstReg;
5085 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5086 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5087 if (!fGstRegShadowsNew)
5088 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5089 } while (fGstRegs != 0);
5090 }
5091 else
5092 {
5093 /*
5094 * Clear all.
5095 */
5096 do
5097 {
5098 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5099 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5100 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5101 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
5102 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5103
5104 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5105 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5106 } while (fGstRegs != 0);
5107 pReNative->Core.bmHstRegsWithGstShadow = 0;
5108 }
5109 }
5110}
5111
5112
5113/**
5114 * Flushes guest register shadow copies held by a set of host registers.
5115 *
5116 * This is used with the TLB lookup code for ensuring that we don't carry on
5117 * with any guest shadows in volatile registers, as these will get corrupted by
5118 * a TLB miss.
5119 *
5120 * @param pReNative The native recompile state.
5121 * @param fHstRegs Set of host registers to flush guest shadows for.
5122 */
5123DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
5124{
5125 /*
5126 * Reduce the mask by what's currently shadowed.
5127 */
5128 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
5129 fHstRegs &= bmHstRegsWithGstShadowOld;
5130 if (fHstRegs)
5131 {
5132 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
5133 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
5134 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
5135 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
5136 if (bmHstRegsWithGstShadowNew)
5137 {
5138 /*
5139 * Partial (likely).
5140 */
5141 uint64_t fGstShadows = 0;
5142 do
5143 {
5144 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5145 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
5146 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
5147 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
5148
5149 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5150 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5151 fHstRegs &= ~RT_BIT_32(idxHstReg);
5152 } while (fHstRegs != 0);
5153 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
5154 }
5155 else
5156 {
5157 /*
5158 * Clear all.
5159 */
5160 do
5161 {
5162 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5163 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
5164 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
5165 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
5166
5167 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5168 fHstRegs &= ~RT_BIT_32(idxHstReg);
5169 } while (fHstRegs != 0);
5170 pReNative->Core.bmGstRegShadows = 0;
5171 }
5172 }
5173}
5174
5175
5176/**
5177 * Restores guest shadow copies in volatile registers.
5178 *
5179 * This is used after calling a helper function (think TLB miss) to restore the
5180 * register state of volatile registers.
5181 *
5182 * @param pReNative The native recompile state.
5183 * @param off The code buffer offset.
5184 * @param fHstRegsActiveShadows Set of host registers which are allowed to
5185 * be active (allocated) w/o asserting. Hack.
5186 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
5187 * iemNativeVarRestoreVolatileRegsPostHlpCall()
5188 */
5189DECL_HIDDEN_THROW(uint32_t)
5190iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
5191{
5192 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
5193 if (fHstRegs)
5194 {
5195 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
5196 do
5197 {
5198 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5199
5200 /* It's not fatal if a register is active holding a variable that
5201 shadowing a guest register, ASSUMING all pending guest register
5202 writes were flushed prior to the helper call. However, we'll be
5203 emitting duplicate restores, so it wasts code space. */
5204 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
5205 RT_NOREF(fHstRegsActiveShadows);
5206
5207 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5208 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
5209 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
5210 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
5211
5212 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
5213 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
5214
5215 fHstRegs &= ~RT_BIT_32(idxHstReg);
5216 } while (fHstRegs != 0);
5217 }
5218 return off;
5219}
5220
5221
5222
5223
5224/*********************************************************************************************************************************
5225* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
5226*********************************************************************************************************************************/
5227#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5228
5229/**
5230 * Info about shadowed guest SIMD register values.
5231 * @see IEMNATIVEGSTSIMDREG
5232 */
5233static struct
5234{
5235 /** Offset in VMCPU of XMM (low 128-bit) registers. */
5236 uint32_t offXmm;
5237 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
5238 uint32_t offYmm;
5239 /** Name (for logging). */
5240 const char *pszName;
5241} const g_aGstSimdShadowInfo[] =
5242{
5243#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
5244 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
5245 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
5246 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
5247 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
5248 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
5249 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
5250 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
5251 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
5252 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
5253 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
5254 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
5255 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
5256 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
5257 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
5258 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
5259 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
5260 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
5261#undef CPUMCTX_OFF_AND_SIZE
5262};
5263AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
5264
5265
5266/**
5267 * Frees a temporary SIMD register.
5268 *
5269 * Any shadow copies of guest registers assigned to the host register will not
5270 * be flushed by this operation.
5271 */
5272DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
5273{
5274 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
5275 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
5276 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5277 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
5278 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5279}
5280
5281
5282/**
5283 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
5284 *
5285 * @returns New code bufferoffset.
5286 * @param pReNative The native recompile state.
5287 * @param off Current code buffer position.
5288 * @param enmGstSimdReg The guest SIMD register to flush.
5289 */
5290DECL_HIDDEN_THROW(uint32_t)
5291iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
5292{
5293 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5294
5295 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
5296 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
5297 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg),
5298 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)));
5299
5300 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
5301 {
5302 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5303 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
5304 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5305 }
5306
5307 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))
5308 {
5309 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5310 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
5311 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5312 }
5313
5314 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, enmGstSimdReg);
5315 return off;
5316}
5317
5318
5319/**
5320 * Locate a register, possibly freeing one up.
5321 *
5322 * This ASSUMES the caller has done the minimal/optimal allocation checks and
5323 * failed.
5324 *
5325 * @returns Host register number on success. Returns UINT8_MAX if no registers
5326 * found, the caller is supposed to deal with this and raise a
5327 * allocation type specific status code (if desired).
5328 *
5329 * @throws VBox status code if we're run into trouble spilling a variable of
5330 * recording debug info. Does NOT throw anything if we're out of
5331 * registers, though.
5332 */
5333static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
5334 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
5335{
5336 //STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
5337 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5338 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5339
5340 /*
5341 * Try a freed register that's shadowing a guest register.
5342 */
5343 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
5344 if (fRegs)
5345 {
5346 //STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
5347
5348#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5349 /*
5350 * When we have livness information, we use it to kick out all shadowed
5351 * guest register that will not be needed any more in this TB. If we're
5352 * lucky, this may prevent us from ending up here again.
5353 *
5354 * Note! We must consider the previous entry here so we don't free
5355 * anything that the current threaded function requires (current
5356 * entry is produced by the next threaded function).
5357 */
5358 uint32_t const idxCurCall = pReNative->idxCurCall;
5359 if (idxCurCall > 0)
5360 {
5361 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
5362
5363# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5364 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
5365 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
5366 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
5367#else
5368 /* Construct a mask of the registers not in the read or write state.
5369 Note! We could skips writes, if they aren't from us, as this is just
5370 a hack to prevent trashing registers that have just been written
5371 or will be written when we retire the current instruction. */
5372 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5373 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5374 & IEMLIVENESSBIT_MASK;
5375#endif
5376 /* If it matches any shadowed registers. */
5377 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
5378 {
5379 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
5380 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
5381 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
5382
5383 /* See if we've got any unshadowed registers we can return now. */
5384 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
5385 if (fUnshadowedRegs)
5386 {
5387 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
5388 return (fPreferVolatile
5389 ? ASMBitFirstSetU32(fUnshadowedRegs)
5390 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
5391 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
5392 - 1;
5393 }
5394 }
5395 }
5396#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5397
5398 unsigned const idxReg = (fPreferVolatile
5399 ? ASMBitFirstSetU32(fRegs)
5400 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5401 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
5402 - 1;
5403
5404 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
5405 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
5406 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5407 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
5408
5409 /* We need to flush any pending guest register writes this host SIMD register shadows. */
5410 uint32_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5411 uint32_t idxGstSimdReg = 0;
5412 do
5413 {
5414 if (fGstRegShadows & 0x1)
5415 {
5416 *poff = iemNativeSimdRegFlushPendingWrite(pReNative, *poff, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5417 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg));
5418 }
5419 idxGstSimdReg++;
5420 fGstRegShadows >>= 1;
5421 } while (fGstRegShadows);
5422
5423 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5424 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5425 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5426 pReNative->Core.aHstSimdRegs[idxReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5427 return idxReg;
5428 }
5429
5430 /*
5431 * Try free up a variable that's in a register.
5432 *
5433 * We do two rounds here, first evacuating variables we don't need to be
5434 * saved on the stack, then in the second round move things to the stack.
5435 */
5436 //STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
5437 AssertReleaseFailed(); /** @todo No variable support right now. */
5438#if 0
5439 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
5440 {
5441 uint32_t fVars = pReNative->Core.bmSimdVars;
5442 while (fVars)
5443 {
5444 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
5445 uint8_t const idxReg = pReNative->Core.aSimdVars[idxVar].idxReg;
5446 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
5447 && (RT_BIT_32(idxReg) & fRegMask)
5448 && ( iLoop == 0
5449 ? pReNative->Core.aSimdVars[idxVar].enmKind != kIemNativeVarKind_Stack
5450 : pReNative->Core.aSimdVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5451 && !pReNative->Core.aSimdVars[idxVar].fRegAcquired)
5452 {
5453 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
5454 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
5455 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5456 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5457 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
5458 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
5459
5460 if (pReNative->Core.aSimdVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5461 {
5462 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
5463 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
5464 }
5465
5466 pReNative->Core.aSimdVars[idxVar].idxReg = UINT8_MAX;
5467 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
5468
5469 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5470 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5471 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5472 return idxReg;
5473 }
5474 fVars &= ~RT_BIT_32(idxVar);
5475 }
5476 }
5477#endif
5478
5479 AssertFailed();
5480 return UINT8_MAX;
5481}
5482
5483
5484/**
5485 * Flushes a set of guest register shadow copies.
5486 *
5487 * This is usually done after calling a threaded function or a C-implementation
5488 * of an instruction.
5489 *
5490 * @param pReNative The native recompile state.
5491 * @param fGstSimdRegs Set of guest SIMD registers to flush.
5492 */
5493DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
5494{
5495 /*
5496 * Reduce the mask by what's currently shadowed
5497 */
5498 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
5499 fGstSimdRegs &= bmGstSimdRegShadows;
5500 if (fGstSimdRegs)
5501 {
5502 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
5503 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
5504 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
5505 if (bmGstSimdRegShadowsNew)
5506 {
5507 /*
5508 * Partial.
5509 */
5510 do
5511 {
5512 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5513 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5514 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5515 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5516 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5517 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5518
5519 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
5520 fGstSimdRegs &= ~fInThisHstReg;
5521 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5522 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5523 if (!fGstRegShadowsNew)
5524 {
5525 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5526 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5527 }
5528 } while (fGstSimdRegs != 0);
5529 }
5530 else
5531 {
5532 /*
5533 * Clear all.
5534 */
5535 do
5536 {
5537 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5538 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5539 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5540 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5541 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5542 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5543
5544 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5545 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
5546 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5547 } while (fGstSimdRegs != 0);
5548 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
5549 }
5550 }
5551}
5552
5553
5554/**
5555 * Allocates a temporary host SIMD register.
5556 *
5557 * This may emit code to save register content onto the stack in order to free
5558 * up a register.
5559 *
5560 * @returns The host register number; throws VBox status code on failure,
5561 * so no need to check the return value.
5562 * @param pReNative The native recompile state.
5563 * @param poff Pointer to the variable with the code buffer position.
5564 * This will be update if we need to move a variable from
5565 * register to stack in order to satisfy the request.
5566 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5567 * registers (@c true, default) or the other way around
5568 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5569 */
5570DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
5571{
5572 /*
5573 * Try find a completely unused register, preferably a call-volatile one.
5574 */
5575 uint8_t idxSimdReg;
5576 uint32_t fRegs = ~pReNative->Core.bmHstRegs
5577 & ~pReNative->Core.bmHstRegsWithGstShadow
5578 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
5579 if (fRegs)
5580 {
5581 if (fPreferVolatile)
5582 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5583 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5584 else
5585 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5586 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5587 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5588 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5589 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5590 }
5591 else
5592 {
5593 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
5594 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5595 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5596 }
5597
5598 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5599 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5600}
5601
5602
5603/**
5604 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
5605 * registers.
5606 *
5607 * @returns The host register number; throws VBox status code on failure,
5608 * so no need to check the return value.
5609 * @param pReNative The native recompile state.
5610 * @param poff Pointer to the variable with the code buffer position.
5611 * This will be update if we need to move a variable from
5612 * register to stack in order to satisfy the request.
5613 * @param fRegMask Mask of acceptable registers.
5614 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5615 * registers (@c true, default) or the other way around
5616 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5617 */
5618DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
5619 bool fPreferVolatile /*= true*/)
5620{
5621 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5622 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5623
5624 /*
5625 * Try find a completely unused register, preferably a call-volatile one.
5626 */
5627 uint8_t idxSimdReg;
5628 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
5629 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5630 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
5631 & fRegMask;
5632 if (fRegs)
5633 {
5634 if (fPreferVolatile)
5635 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5636 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5637 else
5638 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5639 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5640 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5641 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5642 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5643 }
5644 else
5645 {
5646 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
5647 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5648 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5649 }
5650
5651 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5652 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5653}
5654
5655
5656/**
5657 * Sets the indiactor for which part of the given SIMD register has valid data loaded.
5658 *
5659 * @param pReNative The native recompile state.
5660 * @param idxHstSimdReg The host SIMD register to update the state for.
5661 * @param enmLoadSz The load size to set.
5662 */
5663DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg,
5664 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5665{
5666 /* Everything valid already? -> nothing to do. */
5667 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5668 return;
5669
5670 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid)
5671 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
5672 else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz)
5673 {
5674 Assert( ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128
5675 && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5676 || ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128
5677 && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128));
5678 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256;
5679 }
5680}
5681
5682
5683static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdRegDst,
5684 uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
5685{
5686 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
5687 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst
5688 || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5689 {
5690# ifdef RT_ARCH_ARM64
5691 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
5692 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
5693# endif
5694
5695 if (idxHstSimdRegDst != idxHstSimdRegSrc)
5696 {
5697 switch (enmLoadSzDst)
5698 {
5699 case kIemNativeGstSimdRegLdStSz_256:
5700 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5701 break;
5702 case kIemNativeGstSimdRegLdStSz_Low128:
5703 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5704 break;
5705 case kIemNativeGstSimdRegLdStSz_High128:
5706 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst + 1, idxHstSimdRegSrc + 1);
5707 break;
5708 default:
5709 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5710 }
5711
5712 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst);
5713 }
5714 }
5715 else
5716 {
5717 /* Complicated stuff where the source is currently missing something, later. */
5718 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5719 }
5720
5721 return off;
5722}
5723
5724
5725/**
5726 * Allocates a temporary host SIMD register for keeping a guest
5727 * SIMD register value.
5728 *
5729 * Since we may already have a register holding the guest register value,
5730 * code will be emitted to do the loading if that's not the case. Code may also
5731 * be emitted if we have to free up a register to satify the request.
5732 *
5733 * @returns The host register number; throws VBox status code on failure, so no
5734 * need to check the return value.
5735 * @param pReNative The native recompile state.
5736 * @param poff Pointer to the variable with the code buffer
5737 * position. This will be update if we need to move a
5738 * variable from register to stack in order to satisfy
5739 * the request.
5740 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
5741 * @param enmIntendedUse How the caller will be using the host register.
5742 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
5743 * register is okay (default). The ASSUMPTION here is
5744 * that the caller has already flushed all volatile
5745 * registers, so this is only applied if we allocate a
5746 * new register.
5747 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
5748 */
5749DECL_HIDDEN_THROW(uint8_t)
5750iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
5751 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
5752 bool fNoVolatileRegs /*= false*/)
5753{
5754 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
5755#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
5756 AssertMsg( pReNative->idxCurCall == 0
5757 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5758 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5759 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
5760 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5761 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
5762 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
5763#endif
5764#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5765 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
5766#endif
5767 uint32_t const fRegMask = !fNoVolatileRegs
5768 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
5769 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
5770
5771 /*
5772 * First check if the guest register value is already in a host register.
5773 */
5774 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
5775 {
5776 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5777 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
5778 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
5779 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
5780
5781 /* It's not supposed to be allocated... */
5782 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
5783 {
5784 /*
5785 * If the register will trash the guest shadow copy, try find a
5786 * completely unused register we can use instead. If that fails,
5787 * we need to disassociate the host reg from the guest reg.
5788 */
5789 /** @todo would be nice to know if preserving the register is in any way helpful. */
5790 /* If the purpose is calculations, try duplicate the register value as
5791 we'll be clobbering the shadow. */
5792 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
5793 && ( ~pReNative->Core.bmHstSimdRegs
5794 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5795 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
5796 {
5797 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
5798
5799 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5800
5801 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5802 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5803 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5804 idxSimdReg = idxRegNew;
5805 }
5806 /* If the current register matches the restrictions, go ahead and allocate
5807 it for the caller. */
5808 else if (fRegMask & RT_BIT_32(idxSimdReg))
5809 {
5810 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5811 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
5812 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5813 {
5814 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5815 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxSimdReg, idxSimdReg, enmLoadSz);
5816 else
5817 iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz);
5818 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
5819 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5820 }
5821 else
5822 {
5823 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
5824 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
5825 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
5826 }
5827 }
5828 /* Otherwise, allocate a register that satisfies the caller and transfer
5829 the shadowing if compatible with the intended use. (This basically
5830 means the call wants a non-volatile register (RSP push/pop scenario).) */
5831 else
5832 {
5833 Assert(fNoVolatileRegs);
5834 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
5835 !fNoVolatileRegs
5836 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
5837 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5838 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5839 {
5840 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5841 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
5842 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
5843 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5844 }
5845 else
5846 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5847 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5848 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5849 idxSimdReg = idxRegNew;
5850 }
5851 }
5852 else
5853 {
5854 /*
5855 * Oops. Shadowed guest register already allocated!
5856 *
5857 * Allocate a new register, copy the value and, if updating, the
5858 * guest shadow copy assignment to the new register.
5859 */
5860 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5861 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
5862 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
5863 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
5864
5865 /** @todo share register for readonly access. */
5866 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
5867 enmIntendedUse == kIemNativeGstRegUse_Calculation);
5868
5869 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5870 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5871 else
5872 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5873
5874 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5875 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5876 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
5877 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5878 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5879 else
5880 {
5881 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5882 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
5883 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5884 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5885 }
5886 idxSimdReg = idxRegNew;
5887 }
5888 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
5889
5890#ifdef VBOX_STRICT
5891 /* Strict builds: Check that the value is correct. */
5892 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5893 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
5894#endif
5895
5896 return idxSimdReg;
5897 }
5898
5899 /*
5900 * Allocate a new register, load it with the guest value and designate it as a copy of the
5901 */
5902 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
5903
5904 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5905 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
5906 else
5907 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5908
5909 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5910 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
5911
5912 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
5913 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5914
5915 return idxRegNew;
5916}
5917
5918#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5919
5920
5921
5922/*********************************************************************************************************************************
5923* Code emitters for flushing pending guest register writes and sanity checks *
5924*********************************************************************************************************************************/
5925
5926#ifdef VBOX_STRICT
5927/**
5928 * Does internal register allocator sanity checks.
5929 */
5930DECLHIDDEN(void) iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
5931{
5932 /*
5933 * Iterate host registers building a guest shadowing set.
5934 */
5935 uint64_t bmGstRegShadows = 0;
5936 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
5937 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
5938 while (bmHstRegsWithGstShadow)
5939 {
5940 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
5941 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5942 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5943
5944 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5945 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
5946 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
5947 bmGstRegShadows |= fThisGstRegShadows;
5948 while (fThisGstRegShadows)
5949 {
5950 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
5951 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
5952 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
5953 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
5954 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
5955 }
5956 }
5957 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
5958 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
5959 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
5960
5961 /*
5962 * Now the other way around, checking the guest to host index array.
5963 */
5964 bmHstRegsWithGstShadow = 0;
5965 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
5966 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5967 while (bmGstRegShadows)
5968 {
5969 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
5970 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5971 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
5972
5973 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5974 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
5975 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
5976 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
5977 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5978 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
5979 }
5980 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
5981 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
5982 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
5983}
5984#endif /* VBOX_STRICT */
5985
5986
5987/**
5988 * Flushes any delayed guest register writes.
5989 *
5990 * This must be called prior to calling CImpl functions and any helpers that use
5991 * the guest state (like raising exceptions) and such.
5992 *
5993 * This optimization has not yet been implemented. The first target would be
5994 * RIP updates, since these are the most common ones.
5995 */
5996DECL_HIDDEN_THROW(uint32_t)
5997iemNativeRegFlushPendingWritesSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept, bool fFlushShadows)
5998{
5999#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6000 if (!(fGstShwExcept & kIemNativeGstReg_Pc))
6001 off = iemNativeEmitPcWriteback(pReNative, off);
6002#else
6003 RT_NOREF(pReNative, fGstShwExcept);
6004#endif
6005
6006#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6007 /** @todo r=bird: There must be a quicker way to check if anything needs
6008 * doing and then call simd function to do the flushing */
6009 /** @todo This doesn't mix well with fGstShwExcept but we ignore this for now and just flush everything. */
6010 for (uint8_t idxGstSimdReg = 0; idxGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo); idxGstSimdReg++)
6011 {
6012 Assert( (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg)
6013 || !IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg)));
6014
6015 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg))
6016 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
6017
6018 if ( fFlushShadows
6019 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg))
6020 {
6021 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxGstSimdReg];
6022
6023 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
6024 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg)));
6025 }
6026 }
6027#else
6028 RT_NOREF(pReNative, fGstShwExcept, fFlushShadows);
6029#endif
6030
6031 return off;
6032}
6033
6034
6035#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6036/**
6037 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
6038 */
6039DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcWritebackSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6040{
6041 Assert(pReNative->Core.offPc);
6042# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6043 iemNativeDbgInfoAddNativeOffset(pReNative, off);
6044 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, pReNative->Core.cInstrPcUpdateSkipped);
6045# endif
6046
6047# ifndef IEMNATIVE_REG_FIXED_PC_DBG
6048 /* Allocate a temporary PC register. */
6049 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6050
6051 /* Perform the addition and store the result. */
6052 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
6053 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6054
6055 /* Free but don't flush the PC register. */
6056 iemNativeRegFreeTmp(pReNative, idxPcReg);
6057# else
6058 /* Compare the shadow with the context value, they should match. */
6059 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
6060 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
6061# endif
6062
6063 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, pReNative->Core.cInstrPcUpdateSkipped);
6064 pReNative->Core.offPc = 0;
6065 pReNative->Core.cInstrPcUpdateSkipped = 0;
6066
6067 return off;
6068}
6069#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
6070
6071
6072/*********************************************************************************************************************************
6073* Code Emitters (larger snippets) *
6074*********************************************************************************************************************************/
6075
6076/**
6077 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
6078 * extending to 64-bit width.
6079 *
6080 * @returns New code buffer offset on success, UINT32_MAX on failure.
6081 * @param pReNative .
6082 * @param off The current code buffer position.
6083 * @param idxHstReg The host register to load the guest register value into.
6084 * @param enmGstReg The guest register to load.
6085 *
6086 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6087 * that is something the caller needs to do if applicable.
6088 */
6089DECL_HIDDEN_THROW(uint32_t)
6090iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
6091{
6092 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
6093 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
6094
6095 switch (g_aGstShadowInfo[enmGstReg].cb)
6096 {
6097 case sizeof(uint64_t):
6098 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6099 case sizeof(uint32_t):
6100 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6101 case sizeof(uint16_t):
6102 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6103#if 0 /* not present in the table. */
6104 case sizeof(uint8_t):
6105 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6106#endif
6107 default:
6108 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6109 }
6110}
6111
6112
6113#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6114/**
6115 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
6116 *
6117 * @returns New code buffer offset on success, UINT32_MAX on failure.
6118 * @param pReNative The recompiler state.
6119 * @param off The current code buffer position.
6120 * @param idxHstSimdReg The host register to load the guest register value into.
6121 * @param enmGstSimdReg The guest register to load.
6122 * @param enmLoadSz The load size of the register.
6123 *
6124 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6125 * that is something the caller needs to do if applicable.
6126 */
6127DECL_HIDDEN_THROW(uint32_t)
6128iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
6129 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6130{
6131 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
6132
6133 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz);
6134 switch (enmLoadSz)
6135 {
6136 case kIemNativeGstSimdRegLdStSz_256:
6137 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6138 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6139 case kIemNativeGstSimdRegLdStSz_Low128:
6140 return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6141 case kIemNativeGstSimdRegLdStSz_High128:
6142 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6143 default:
6144 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6145 }
6146}
6147#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6148
6149#ifdef VBOX_STRICT
6150
6151/**
6152 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
6153 *
6154 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6155 * Trashes EFLAGS on AMD64.
6156 */
6157DECL_HIDDEN_THROW(uint32_t)
6158iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
6159{
6160# ifdef RT_ARCH_AMD64
6161 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6162
6163 /* rol reg64, 32 */
6164 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6165 pbCodeBuf[off++] = 0xc1;
6166 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6167 pbCodeBuf[off++] = 32;
6168
6169 /* test reg32, ffffffffh */
6170 if (idxReg >= 8)
6171 pbCodeBuf[off++] = X86_OP_REX_B;
6172 pbCodeBuf[off++] = 0xf7;
6173 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6174 pbCodeBuf[off++] = 0xff;
6175 pbCodeBuf[off++] = 0xff;
6176 pbCodeBuf[off++] = 0xff;
6177 pbCodeBuf[off++] = 0xff;
6178
6179 /* je/jz +1 */
6180 pbCodeBuf[off++] = 0x74;
6181 pbCodeBuf[off++] = 0x01;
6182
6183 /* int3 */
6184 pbCodeBuf[off++] = 0xcc;
6185
6186 /* rol reg64, 32 */
6187 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6188 pbCodeBuf[off++] = 0xc1;
6189 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6190 pbCodeBuf[off++] = 32;
6191
6192# elif defined(RT_ARCH_ARM64)
6193 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6194 /* lsr tmp0, reg64, #32 */
6195 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
6196 /* cbz tmp0, +1 */
6197 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6198 /* brk #0x1100 */
6199 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
6200
6201# else
6202# error "Port me!"
6203# endif
6204 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6205 return off;
6206}
6207
6208
6209/**
6210 * Emitting code that checks that the content of register @a idxReg is the same
6211 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
6212 * instruction if that's not the case.
6213 *
6214 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6215 * Trashes EFLAGS on AMD64.
6216 */
6217DECL_HIDDEN_THROW(uint32_t)
6218iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
6219{
6220# ifdef RT_ARCH_AMD64
6221 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6222
6223 /* cmp reg, [mem] */
6224 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
6225 {
6226 if (idxReg >= 8)
6227 pbCodeBuf[off++] = X86_OP_REX_R;
6228 pbCodeBuf[off++] = 0x38;
6229 }
6230 else
6231 {
6232 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
6233 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
6234 else
6235 {
6236 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
6237 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6238 else
6239 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
6240 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
6241 if (idxReg >= 8)
6242 pbCodeBuf[off++] = X86_OP_REX_R;
6243 }
6244 pbCodeBuf[off++] = 0x39;
6245 }
6246 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
6247
6248 /* je/jz +1 */
6249 pbCodeBuf[off++] = 0x74;
6250 pbCodeBuf[off++] = 0x01;
6251
6252 /* int3 */
6253 pbCodeBuf[off++] = 0xcc;
6254
6255 /* For values smaller than the register size, we must check that the rest
6256 of the register is all zeros. */
6257 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
6258 {
6259 /* test reg64, imm32 */
6260 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6261 pbCodeBuf[off++] = 0xf7;
6262 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6263 pbCodeBuf[off++] = 0;
6264 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
6265 pbCodeBuf[off++] = 0xff;
6266 pbCodeBuf[off++] = 0xff;
6267
6268 /* je/jz +1 */
6269 pbCodeBuf[off++] = 0x74;
6270 pbCodeBuf[off++] = 0x01;
6271
6272 /* int3 */
6273 pbCodeBuf[off++] = 0xcc;
6274 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6275 }
6276 else
6277 {
6278 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6279 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
6280 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
6281 }
6282
6283# elif defined(RT_ARCH_ARM64)
6284 /* mov TMP0, [gstreg] */
6285 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
6286
6287 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6288 /* sub tmp0, tmp0, idxReg */
6289 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
6290 /* cbz tmp0, +1 */
6291 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6292 /* brk #0x1000+enmGstReg */
6293 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
6294 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6295
6296# else
6297# error "Port me!"
6298# endif
6299 return off;
6300}
6301
6302
6303# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6304# ifdef RT_ARCH_AMD64
6305/**
6306 * Helper for AMD64 to emit code which checks the low 128-bits of the given SIMD register against the given vCPU offset.
6307 */
6308DECL_FORCE_INLINE_THROW(uint32_t) iemNativeEmitGuestSimdRegValueCheckVCpuU128(uint8_t * const pbCodeBuf, uint32_t off, uint8_t idxSimdReg, uint32_t offVCpu)
6309{
6310 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
6311 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6312 if (idxSimdReg >= 8)
6313 pbCodeBuf[off++] = X86_OP_REX_R;
6314 pbCodeBuf[off++] = 0x0f;
6315 pbCodeBuf[off++] = 0x38;
6316 pbCodeBuf[off++] = 0x29;
6317 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxSimdReg, offVCpu);
6318
6319 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
6320 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6321 pbCodeBuf[off++] = X86_OP_REX_W
6322 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
6323 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6324 pbCodeBuf[off++] = 0x0f;
6325 pbCodeBuf[off++] = 0x3a;
6326 pbCodeBuf[off++] = 0x16;
6327 pbCodeBuf[off++] = 0xeb;
6328 pbCodeBuf[off++] = 0x00;
6329
6330 /* cmp tmp0, 0xffffffffffffffff. */
6331 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6332 pbCodeBuf[off++] = 0x83;
6333 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6334 pbCodeBuf[off++] = 0xff;
6335
6336 /* je/jz +1 */
6337 pbCodeBuf[off++] = 0x74;
6338 pbCodeBuf[off++] = 0x01;
6339
6340 /* int3 */
6341 pbCodeBuf[off++] = 0xcc;
6342
6343 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
6344 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6345 pbCodeBuf[off++] = X86_OP_REX_W
6346 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
6347 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6348 pbCodeBuf[off++] = 0x0f;
6349 pbCodeBuf[off++] = 0x3a;
6350 pbCodeBuf[off++] = 0x16;
6351 pbCodeBuf[off++] = 0xeb;
6352 pbCodeBuf[off++] = 0x01;
6353
6354 /* cmp tmp0, 0xffffffffffffffff. */
6355 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6356 pbCodeBuf[off++] = 0x83;
6357 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6358 pbCodeBuf[off++] = 0xff;
6359
6360 /* je/jz +1 */
6361 pbCodeBuf[off++] = 0x74;
6362 pbCodeBuf[off++] = 0x01;
6363
6364 /* int3 */
6365 pbCodeBuf[off++] = 0xcc;
6366
6367 return off;
6368}
6369# endif
6370
6371
6372/**
6373 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
6374 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
6375 * instruction if that's not the case.
6376 *
6377 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
6378 * Trashes EFLAGS on AMD64.
6379 */
6380DECL_HIDDEN_THROW(uint32_t)
6381iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
6382 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6383{
6384 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
6385 if ( ( enmLoadSz == kIemNativeGstSimdRegLdStSz_256
6386 && ( IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)
6387 || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6388 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128
6389 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
6390 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_High128
6391 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6392 return off;
6393
6394# ifdef RT_ARCH_AMD64
6395 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6396 {
6397 /* movdqa vectmp0, idxSimdReg */
6398 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6399
6400 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
6401
6402 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6403 g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6404 }
6405
6406 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6407 {
6408 /* Due to the fact that CPUMCTX stores the high 128-bit separately we need to do this all over again for the high part. */
6409 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 50);
6410
6411 /* vextracti128 vectmp0, idxSimdReg, 1 */
6412 pbCodeBuf[off++] = X86_OP_VEX3;
6413 pbCodeBuf[off++] = (idxSimdReg < 8 ? X86_OP_VEX3_BYTE1_R : 0)
6414 | X86_OP_VEX3_BYTE1_X
6415 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? X86_OP_VEX3_BYTE1_B : 0)
6416 | 0x03; /* Opcode map */
6417 pbCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX3_BYTE2_P_066H);
6418 pbCodeBuf[off++] = 0x39;
6419 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxSimdReg & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
6420 pbCodeBuf[off++] = 0x01;
6421
6422 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6423 g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6424 }
6425# elif defined(RT_ARCH_ARM64)
6426 /* mov vectmp0, [gstreg] */
6427 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
6428
6429 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6430 {
6431 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6432 /* eor vectmp0, vectmp0, idxSimdReg */
6433 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6434 /* cnt vectmp0, vectmp0, #0*/
6435 pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0);
6436 /* umov tmp0, vectmp0.D[0] */
6437 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6438 0 /*idxElem*/, kArmv8InstrUmovInsSz_U64);
6439 /* cbz tmp0, +1 */
6440 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6441 /* brk #0x1000+enmGstReg */
6442 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6443 }
6444
6445 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6446 {
6447 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6448 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
6449 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg + 1);
6450 /* cnt vectmp0 + 1, vectmp0 + 1, #0*/
6451 pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1);
6452 /* umov tmp0, (vectmp0 + 1).D[0] */
6453 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
6454 0 /*idxElem*/, kArmv8InstrUmovInsSz_U64);
6455 /* cbz tmp0, +1 */
6456 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6457 /* brk #0x1000+enmGstReg */
6458 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6459 }
6460
6461# else
6462# error "Port me!"
6463# endif
6464
6465 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6466 return off;
6467}
6468# endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6469
6470
6471/**
6472 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
6473 * important bits.
6474 *
6475 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6476 * Trashes EFLAGS on AMD64.
6477 */
6478DECL_HIDDEN_THROW(uint32_t)
6479iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
6480{
6481 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6482 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
6483 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
6484 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
6485
6486#ifdef RT_ARCH_AMD64
6487 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6488
6489 /* je/jz +1 */
6490 pbCodeBuf[off++] = 0x74;
6491 pbCodeBuf[off++] = 0x01;
6492
6493 /* int3 */
6494 pbCodeBuf[off++] = 0xcc;
6495
6496# elif defined(RT_ARCH_ARM64)
6497 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6498
6499 /* b.eq +1 */
6500 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
6501 /* brk #0x2000 */
6502 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
6503
6504# else
6505# error "Port me!"
6506# endif
6507 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6508
6509 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6510 return off;
6511}
6512
6513#endif /* VBOX_STRICT */
6514
6515
6516#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6517/**
6518 * Worker for IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK.
6519 */
6520DECL_HIDDEN_THROW(uint32_t)
6521iemNativeEmitEFlagsSkippingCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflNeeded)
6522{
6523 uint32_t const offVCpu = RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags);
6524
6525 fEflNeeded &= X86_EFL_STATUS_BITS;
6526 if (fEflNeeded)
6527 {
6528# ifdef RT_ARCH_AMD64
6529 /* test dword [pVCpu + offVCpu], imm32 */
6530 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6531 if (fEflNeeded <= 0xff)
6532 {
6533 pCodeBuf[off++] = 0xf6;
6534 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6535 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6536 }
6537 else
6538 {
6539 pCodeBuf[off++] = 0xf7;
6540 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6541 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6542 pCodeBuf[off++] = RT_BYTE2(fEflNeeded);
6543 pCodeBuf[off++] = RT_BYTE3(fEflNeeded);
6544 pCodeBuf[off++] = RT_BYTE4(fEflNeeded);
6545 }
6546 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6547
6548# else
6549 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6550 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, offVCpu);
6551 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp, fEflNeeded);
6552# ifdef RT_ARCH_ARM64
6553 off = iemNativeEmitJzToFixed(pReNative, off, off + 2);
6554 off = iemNativeEmitBrk(pReNative, off, 0x7777);
6555# else
6556# error "Port me!"
6557# endif
6558 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6559# endif
6560 }
6561 return off;
6562}
6563#endif /* IEMNATIVE_STRICT_EFLAGS_SKIPPING */
6564
6565
6566/**
6567 * Emits a code for checking the return code of a call and rcPassUp, returning
6568 * from the code if either are non-zero.
6569 */
6570DECL_HIDDEN_THROW(uint32_t)
6571iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6572{
6573#ifdef RT_ARCH_AMD64
6574 /*
6575 * AMD64: eax = call status code.
6576 */
6577
6578 /* edx = rcPassUp */
6579 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6580# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6581 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
6582# endif
6583
6584 /* edx = eax | rcPassUp */
6585 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6586 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
6587 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
6588 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6589
6590 /* Jump to non-zero status return path. */
6591 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
6592
6593 /* done. */
6594
6595#elif RT_ARCH_ARM64
6596 /*
6597 * ARM64: w0 = call status code.
6598 */
6599# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6600 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
6601# endif
6602 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6603
6604 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6605
6606 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
6607
6608 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6609 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6610 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
6611
6612#else
6613# error "port me"
6614#endif
6615 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6616 RT_NOREF_PV(idxInstr);
6617 return off;
6618}
6619
6620
6621/**
6622 * Emits code to check if the content of @a idxAddrReg is a canonical address,
6623 * raising a \#GP(0) if it isn't.
6624 *
6625 * @returns New code buffer offset, UINT32_MAX on failure.
6626 * @param pReNative The native recompile state.
6627 * @param off The code buffer offset.
6628 * @param idxAddrReg The host register with the address to check.
6629 * @param idxInstr The current instruction.
6630 */
6631DECL_HIDDEN_THROW(uint32_t)
6632iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
6633{
6634 /*
6635 * Make sure we don't have any outstanding guest register writes as we may
6636 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6637 */
6638 off = iemNativeRegFlushPendingWrites(pReNative, off);
6639
6640#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6641 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6642#else
6643 RT_NOREF(idxInstr);
6644#endif
6645
6646#ifdef RT_ARCH_AMD64
6647 /*
6648 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
6649 * return raisexcpt();
6650 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
6651 */
6652 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6653
6654 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
6655 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
6656 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
6657 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
6658 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6659
6660 iemNativeRegFreeTmp(pReNative, iTmpReg);
6661
6662#elif defined(RT_ARCH_ARM64)
6663 /*
6664 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
6665 * return raisexcpt();
6666 * ----
6667 * mov x1, 0x800000000000
6668 * add x1, x0, x1
6669 * cmp xzr, x1, lsr 48
6670 * b.ne .Lraisexcpt
6671 */
6672 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6673
6674 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
6675 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
6676 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
6677 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6678
6679 iemNativeRegFreeTmp(pReNative, iTmpReg);
6680
6681#else
6682# error "Port me"
6683#endif
6684 return off;
6685}
6686
6687
6688/**
6689 * Emits code to check if that the content of @a idxAddrReg is within the limit
6690 * of CS, raising a \#GP(0) if it isn't.
6691 *
6692 * @returns New code buffer offset; throws VBox status code on error.
6693 * @param pReNative The native recompile state.
6694 * @param off The code buffer offset.
6695 * @param idxAddrReg The host register (32-bit) with the address to
6696 * check.
6697 * @param idxInstr The current instruction.
6698 */
6699DECL_HIDDEN_THROW(uint32_t)
6700iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6701 uint8_t idxAddrReg, uint8_t idxInstr)
6702{
6703 /*
6704 * Make sure we don't have any outstanding guest register writes as we may
6705 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6706 */
6707 off = iemNativeRegFlushPendingWrites(pReNative, off);
6708
6709#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6710 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6711#else
6712 RT_NOREF(idxInstr);
6713#endif
6714
6715 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
6716 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
6717 kIemNativeGstRegUse_ReadOnly);
6718
6719 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
6720 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6721
6722 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
6723 return off;
6724}
6725
6726
6727/**
6728 * Emits a call to a CImpl function or something similar.
6729 */
6730DECL_HIDDEN_THROW(uint32_t)
6731iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
6732 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
6733{
6734 /* Writeback everything. */
6735 off = iemNativeRegFlushPendingWrites(pReNative, off);
6736
6737 /*
6738 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
6739 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
6740 */
6741 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
6742 fGstShwFlush
6743 | RT_BIT_64(kIemNativeGstReg_Pc)
6744 | RT_BIT_64(kIemNativeGstReg_EFlags));
6745 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6746
6747 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6748
6749 /*
6750 * Load the parameters.
6751 */
6752#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
6753 /* Special code the hidden VBOXSTRICTRC pointer. */
6754 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6755 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6756 if (cAddParams > 0)
6757 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
6758 if (cAddParams > 1)
6759 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
6760 if (cAddParams > 2)
6761 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
6762 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6763
6764#else
6765 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6766 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6767 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6768 if (cAddParams > 0)
6769 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
6770 if (cAddParams > 1)
6771 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
6772 if (cAddParams > 2)
6773# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
6774 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
6775# else
6776 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
6777# endif
6778#endif
6779
6780 /*
6781 * Make the call.
6782 */
6783 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
6784
6785#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6786 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6787#endif
6788
6789 /*
6790 * Check the status code.
6791 */
6792 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
6793}
6794
6795
6796/**
6797 * Emits a call to a threaded worker function.
6798 */
6799DECL_HIDDEN_THROW(uint32_t)
6800iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6801{
6802 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6803
6804 /* We don't know what the threaded function is doing so we must flush all pending writes. */
6805 off = iemNativeRegFlushPendingWrites(pReNative, off);
6806
6807 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
6808 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6809
6810#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6811 /* The threaded function may throw / long jmp, so set current instruction
6812 number if we're counting. */
6813 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6814#endif
6815
6816 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
6817
6818#ifdef RT_ARCH_AMD64
6819 /* Load the parameters and emit the call. */
6820# ifdef RT_OS_WINDOWS
6821# ifndef VBOXSTRICTRC_STRICT_ENABLED
6822 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6823 if (cParams > 0)
6824 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
6825 if (cParams > 1)
6826 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
6827 if (cParams > 2)
6828 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
6829# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
6830 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
6831 if (cParams > 0)
6832 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
6833 if (cParams > 1)
6834 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
6835 if (cParams > 2)
6836 {
6837 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
6838 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
6839 }
6840 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6841# endif /* VBOXSTRICTRC_STRICT_ENABLED */
6842# else
6843 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6844 if (cParams > 0)
6845 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
6846 if (cParams > 1)
6847 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
6848 if (cParams > 2)
6849 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
6850# endif
6851
6852 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6853
6854# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6855 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6856# endif
6857
6858#elif RT_ARCH_ARM64
6859 /*
6860 * ARM64:
6861 */
6862 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6863 if (cParams > 0)
6864 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
6865 if (cParams > 1)
6866 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
6867 if (cParams > 2)
6868 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
6869
6870 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6871
6872#else
6873# error "port me"
6874#endif
6875
6876 /*
6877 * Check the status code.
6878 */
6879 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
6880
6881 return off;
6882}
6883
6884#ifdef VBOX_WITH_STATISTICS
6885/**
6886 * Emits code to update the thread call statistics.
6887 */
6888DECL_INLINE_THROW(uint32_t)
6889iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6890{
6891 /*
6892 * Update threaded function stats.
6893 */
6894 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
6895 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
6896# if defined(RT_ARCH_ARM64)
6897 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6898 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6899 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
6900 iemNativeRegFreeTmp(pReNative, idxTmp1);
6901 iemNativeRegFreeTmp(pReNative, idxTmp2);
6902# else
6903 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
6904# endif
6905 return off;
6906}
6907#endif /* VBOX_WITH_STATISTICS */
6908
6909
6910/**
6911 * Emits the code at the ReturnWithFlags label (returns
6912 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
6913 */
6914static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6915{
6916 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
6917 if (idxLabel != UINT32_MAX)
6918 {
6919 iemNativeLabelDefine(pReNative, idxLabel, off);
6920
6921 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
6922
6923 /* jump back to the return sequence. */
6924 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6925 }
6926 return off;
6927}
6928
6929
6930/**
6931 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
6932 */
6933static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6934{
6935 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
6936 if (idxLabel != UINT32_MAX)
6937 {
6938 iemNativeLabelDefine(pReNative, idxLabel, off);
6939
6940 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
6941
6942 /* jump back to the return sequence. */
6943 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6944 }
6945 return off;
6946}
6947
6948
6949/**
6950 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
6951 */
6952static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6953{
6954 /*
6955 * Generate the rc + rcPassUp fiddling code if needed.
6956 */
6957 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6958 if (idxLabel != UINT32_MAX)
6959 {
6960 iemNativeLabelDefine(pReNative, idxLabel, off);
6961
6962 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
6963#ifdef RT_ARCH_AMD64
6964# ifdef RT_OS_WINDOWS
6965# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6966 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
6967# endif
6968 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6969 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
6970# else
6971 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6972 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
6973# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6974 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
6975# endif
6976# endif
6977# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6978 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
6979# endif
6980
6981#else
6982 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
6983 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6984 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
6985#endif
6986
6987 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
6988 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6989 }
6990 return off;
6991}
6992
6993
6994/**
6995 * Emits a standard epilog.
6996 */
6997static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
6998{
6999 *pidxReturnLabel = UINT32_MAX;
7000
7001 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
7002 off = iemNativeRegFlushPendingWrites(pReNative, off);
7003
7004 /*
7005 * Successful return, so clear the return register (eax, w0).
7006 */
7007 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
7008
7009 /*
7010 * Define label for common return point.
7011 */
7012 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
7013 *pidxReturnLabel = idxReturn;
7014
7015 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
7016
7017 /*
7018 * Restore registers and return.
7019 */
7020#ifdef RT_ARCH_AMD64
7021 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
7022
7023 /* Reposition esp at the r15 restore point. */
7024 pbCodeBuf[off++] = X86_OP_REX_W;
7025 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
7026 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
7027 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
7028
7029 /* Pop non-volatile registers and return */
7030 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
7031 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
7032 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
7033 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
7034 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
7035 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
7036 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
7037 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
7038# ifdef RT_OS_WINDOWS
7039 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
7040 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
7041# endif
7042 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
7043 pbCodeBuf[off++] = 0xc9; /* leave */
7044 pbCodeBuf[off++] = 0xc3; /* ret */
7045 pbCodeBuf[off++] = 0xcc; /* int3 poison */
7046
7047#elif RT_ARCH_ARM64
7048 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7049
7050 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
7051 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
7052 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
7053 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
7054 IEMNATIVE_FRAME_VAR_SIZE / 8);
7055 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
7056 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7057 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
7058 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7059 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
7060 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7061 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
7062 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7063 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
7064 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7065 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
7066 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
7067
7068 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
7069 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
7070 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
7071 IEMNATIVE_FRAME_SAVE_REG_SIZE);
7072
7073 /* retab / ret */
7074# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
7075 if (1)
7076 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
7077 else
7078# endif
7079 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
7080
7081#else
7082# error "port me"
7083#endif
7084 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7085
7086 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
7087}
7088
7089
7090/**
7091 * Emits a standard prolog.
7092 */
7093static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7094{
7095#ifdef RT_ARCH_AMD64
7096 /*
7097 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
7098 * reserving 64 bytes for stack variables plus 4 non-register argument
7099 * slots. Fixed register assignment: xBX = pReNative;
7100 *
7101 * Since we always do the same register spilling, we can use the same
7102 * unwind description for all the code.
7103 */
7104 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7105 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
7106 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
7107 pbCodeBuf[off++] = 0x8b;
7108 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
7109 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
7110 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
7111# ifdef RT_OS_WINDOWS
7112 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
7113 pbCodeBuf[off++] = 0x8b;
7114 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
7115 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
7116 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
7117# else
7118 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
7119 pbCodeBuf[off++] = 0x8b;
7120 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
7121# endif
7122 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
7123 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
7124 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
7125 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
7126 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
7127 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
7128 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
7129 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
7130
7131# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
7132 /* Save the frame pointer. */
7133 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
7134# endif
7135
7136 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
7137 X86_GREG_xSP,
7138 IEMNATIVE_FRAME_ALIGN_SIZE
7139 + IEMNATIVE_FRAME_VAR_SIZE
7140 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
7141 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
7142 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
7143 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
7144 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
7145
7146#elif RT_ARCH_ARM64
7147 /*
7148 * We set up a stack frame exactly like on x86, only we have to push the
7149 * return address our selves here. We save all non-volatile registers.
7150 */
7151 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
7152
7153# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
7154 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
7155 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
7156 * in any way conditional, so just emitting this instructions now and hoping for the best... */
7157 /* pacibsp */
7158 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
7159# endif
7160
7161 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
7162 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
7163 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
7164 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
7165 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
7166 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
7167 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7168 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
7169 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7170 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
7171 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7172 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
7173 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7174 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
7175 /* Save the BP and LR (ret address) registers at the top of the frame. */
7176 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7177 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
7178 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
7179 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
7180 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
7181 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
7182
7183 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
7184 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
7185
7186 /* mov r28, r0 */
7187 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
7188 /* mov r27, r1 */
7189 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
7190
7191# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
7192 /* Save the frame pointer. */
7193 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
7194 ARMV8_A64_REG_X2);
7195# endif
7196
7197#else
7198# error "port me"
7199#endif
7200 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7201 return off;
7202}
7203
7204
7205/*********************************************************************************************************************************
7206* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
7207*********************************************************************************************************************************/
7208
7209/**
7210 * Internal work that allocates a variable with kind set to
7211 * kIemNativeVarKind_Invalid and no current stack allocation.
7212 *
7213 * The kind will either be set by the caller or later when the variable is first
7214 * assigned a value.
7215 *
7216 * @returns Unpacked index.
7217 * @internal
7218 */
7219static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7220{
7221 Assert(cbType > 0 && cbType <= 64);
7222 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
7223 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
7224 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
7225 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7226 pReNative->Core.aVars[idxVar].cbVar = cbType;
7227 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7228 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7229 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
7230 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
7231 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
7232 pReNative->Core.aVars[idxVar].fRegAcquired = false;
7233 pReNative->Core.aVars[idxVar].u.uValue = 0;
7234#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7235 pReNative->Core.aVars[idxVar].fSimdReg = false;
7236#endif
7237 return idxVar;
7238}
7239
7240
7241/**
7242 * Internal work that allocates an argument variable w/o setting enmKind.
7243 *
7244 * @returns Unpacked index.
7245 * @internal
7246 */
7247static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7248{
7249 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
7250 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7251 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
7252
7253 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7254 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
7255 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
7256 return idxVar;
7257}
7258
7259
7260/**
7261 * Gets the stack slot for a stack variable, allocating one if necessary.
7262 *
7263 * Calling this function implies that the stack slot will contain a valid
7264 * variable value. The caller deals with any register currently assigned to the
7265 * variable, typically by spilling it into the stack slot.
7266 *
7267 * @returns The stack slot number.
7268 * @param pReNative The recompiler state.
7269 * @param idxVar The variable.
7270 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
7271 */
7272DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7273{
7274 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7275 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7276 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
7277
7278 /* Already got a slot? */
7279 uint8_t const idxStackSlot = pVar->idxStackSlot;
7280 if (idxStackSlot != UINT8_MAX)
7281 {
7282 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
7283 return idxStackSlot;
7284 }
7285
7286 /*
7287 * A single slot is easy to allocate.
7288 * Allocate them from the top end, closest to BP, to reduce the displacement.
7289 */
7290 if (pVar->cbVar <= sizeof(uint64_t))
7291 {
7292 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7293 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7294 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
7295 pVar->idxStackSlot = (uint8_t)iSlot;
7296 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
7297 return (uint8_t)iSlot;
7298 }
7299
7300 /*
7301 * We need more than one stack slot.
7302 *
7303 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
7304 */
7305 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
7306 Assert(pVar->cbVar <= 64);
7307 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
7308 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
7309 uint32_t bmStack = pReNative->Core.bmStack;
7310 while (bmStack != UINT32_MAX)
7311 {
7312 unsigned iSlot = ASMBitLastSetU32(~bmStack);
7313 AssertStmt(iSlot, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7314 iSlot = (iSlot - 1) & ~fBitAlignMask;
7315 if ((bmStack & ~(fBitAllocMask << iSlot)) == bmStack)
7316 {
7317 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
7318 pVar->idxStackSlot = (uint8_t)iSlot;
7319 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7320 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
7321 return (uint8_t)iSlot;
7322 }
7323
7324 bmStack |= (fBitAllocMask << iSlot);
7325 }
7326 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7327}
7328
7329
7330/**
7331 * Changes the variable to a stack variable.
7332 *
7333 * Currently this is s only possible to do the first time the variable is used,
7334 * switching later is can be implemented but not done.
7335 *
7336 * @param pReNative The recompiler state.
7337 * @param idxVar The variable.
7338 * @throws VERR_IEM_VAR_IPE_2
7339 */
7340DECL_HIDDEN_THROW(void) iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7341{
7342 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7343 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7344 if (pVar->enmKind != kIemNativeVarKind_Stack)
7345 {
7346 /* We could in theory transition from immediate to stack as well, but it
7347 would involve the caller doing work storing the value on the stack. So,
7348 till that's required we only allow transition from invalid. */
7349 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7350 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7351 pVar->enmKind = kIemNativeVarKind_Stack;
7352
7353 /* Note! We don't allocate a stack slot here, that's only done when a
7354 slot is actually needed to hold a variable value. */
7355 }
7356}
7357
7358
7359/**
7360 * Sets it to a variable with a constant value.
7361 *
7362 * This does not require stack storage as we know the value and can always
7363 * reload it, unless of course it's referenced.
7364 *
7365 * @param pReNative The recompiler state.
7366 * @param idxVar The variable.
7367 * @param uValue The immediate value.
7368 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7369 */
7370DECL_HIDDEN_THROW(void) iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
7371{
7372 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7373 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7374 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7375 {
7376 /* Only simple transitions for now. */
7377 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7378 pVar->enmKind = kIemNativeVarKind_Immediate;
7379 }
7380 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7381
7382 pVar->u.uValue = uValue;
7383 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
7384 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
7385 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
7386}
7387
7388
7389/**
7390 * Sets the variable to a reference (pointer) to @a idxOtherVar.
7391 *
7392 * This does not require stack storage as we know the value and can always
7393 * reload it. Loading is postponed till needed.
7394 *
7395 * @param pReNative The recompiler state.
7396 * @param idxVar The variable. Unpacked.
7397 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
7398 *
7399 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7400 * @internal
7401 */
7402static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
7403{
7404 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
7405 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
7406
7407 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
7408 {
7409 /* Only simple transitions for now. */
7410 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7411 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7412 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
7413 }
7414 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7415
7416 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
7417
7418 /* Update the other variable, ensure it's a stack variable. */
7419 /** @todo handle variables with const values... that'll go boom now. */
7420 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
7421 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
7422}
7423
7424
7425/**
7426 * Sets the variable to a reference (pointer) to a guest register reference.
7427 *
7428 * This does not require stack storage as we know the value and can always
7429 * reload it. Loading is postponed till needed.
7430 *
7431 * @param pReNative The recompiler state.
7432 * @param idxVar The variable.
7433 * @param enmRegClass The class guest registers to reference.
7434 * @param idxReg The register within @a enmRegClass to reference.
7435 *
7436 * @throws VERR_IEM_VAR_IPE_2
7437 */
7438DECL_HIDDEN_THROW(void) iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
7439 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
7440{
7441 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7442 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7443
7444 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
7445 {
7446 /* Only simple transitions for now. */
7447 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7448 pVar->enmKind = kIemNativeVarKind_GstRegRef;
7449 }
7450 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7451
7452 pVar->u.GstRegRef.enmClass = enmRegClass;
7453 pVar->u.GstRegRef.idx = idxReg;
7454}
7455
7456
7457DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7458{
7459 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7460}
7461
7462
7463DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
7464{
7465 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7466
7467 /* Since we're using a generic uint64_t value type, we must truncate it if
7468 the variable is smaller otherwise we may end up with too large value when
7469 scaling up a imm8 w/ sign-extension.
7470
7471 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
7472 in the bios, bx=1) when running on arm, because clang expect 16-bit
7473 register parameters to have bits 16 and up set to zero. Instead of
7474 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
7475 CF value in the result. */
7476 switch (cbType)
7477 {
7478 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7479 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7480 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7481 }
7482 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7483 return idxVar;
7484}
7485
7486
7487DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
7488{
7489 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
7490 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
7491 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
7492 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
7493 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
7494 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7495
7496 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
7497 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
7498 return idxArgVar;
7499}
7500
7501
7502DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7503{
7504 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7505 /* Don't set to stack now, leave that to the first use as for instance
7506 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
7507 return idxVar;
7508}
7509
7510
7511DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
7512{
7513 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7514
7515 /* Since we're using a generic uint64_t value type, we must truncate it if
7516 the variable is smaller otherwise we may end up with too large value when
7517 scaling up a imm8 w/ sign-extension. */
7518 switch (cbType)
7519 {
7520 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7521 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7522 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7523 }
7524 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7525 return idxVar;
7526}
7527
7528
7529/**
7530 * Makes sure variable @a idxVar has a register assigned to it and that it stays
7531 * fixed till we call iemNativeVarRegisterRelease.
7532 *
7533 * @returns The host register number.
7534 * @param pReNative The recompiler state.
7535 * @param idxVar The variable.
7536 * @param poff Pointer to the instruction buffer offset.
7537 * In case a register needs to be freed up or the value
7538 * loaded off the stack.
7539 * @param fInitialized Set if the variable must already have been initialized.
7540 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7541 * the case.
7542 * @param idxRegPref Preferred register number or UINT8_MAX.
7543 */
7544DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7545 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7546{
7547 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7548 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7549 Assert(pVar->cbVar <= 8);
7550 Assert(!pVar->fRegAcquired);
7551
7552 uint8_t idxReg = pVar->idxReg;
7553 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7554 {
7555 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7556 && pVar->enmKind < kIemNativeVarKind_End);
7557 pVar->fRegAcquired = true;
7558 return idxReg;
7559 }
7560
7561 /*
7562 * If the kind of variable has not yet been set, default to 'stack'.
7563 */
7564 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7565 && pVar->enmKind < kIemNativeVarKind_End);
7566 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7567 iemNativeVarSetKindToStack(pReNative, idxVar);
7568
7569 /*
7570 * We have to allocate a register for the variable, even if its a stack one
7571 * as we don't know if there are modification being made to it before its
7572 * finalized (todo: analyze and insert hints about that?).
7573 *
7574 * If we can, we try get the correct register for argument variables. This
7575 * is assuming that most argument variables are fetched as close as possible
7576 * to the actual call, so that there aren't any interfering hidden calls
7577 * (memory accesses, etc) inbetween.
7578 *
7579 * If we cannot or it's a variable, we make sure no argument registers
7580 * that will be used by this MC block will be allocated here, and we always
7581 * prefer non-volatile registers to avoid needing to spill stuff for internal
7582 * call.
7583 */
7584 /** @todo Detect too early argument value fetches and warn about hidden
7585 * calls causing less optimal code to be generated in the python script. */
7586
7587 uint8_t const uArgNo = pVar->uArgNo;
7588 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
7589 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
7590 {
7591 idxReg = g_aidxIemNativeCallRegs[uArgNo];
7592 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7593 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
7594 }
7595 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
7596 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
7597 {
7598 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7599 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
7600 & ~pReNative->Core.bmHstRegsWithGstShadow
7601 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
7602 & fNotArgsMask;
7603 if (fRegs)
7604 {
7605 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
7606 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
7607 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
7608 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
7609 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
7610 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7611 }
7612 else
7613 {
7614 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7615 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
7616 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7617 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7618 }
7619 }
7620 else
7621 {
7622 idxReg = idxRegPref;
7623 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7624 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7625 }
7626 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7627 pVar->idxReg = idxReg;
7628
7629#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7630 pVar->fSimdReg = false;
7631#endif
7632
7633 /*
7634 * Load it off the stack if we've got a stack slot.
7635 */
7636 uint8_t const idxStackSlot = pVar->idxStackSlot;
7637 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7638 {
7639 Assert(fInitialized);
7640 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7641 switch (pVar->cbVar)
7642 {
7643 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
7644 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
7645 case 3: AssertFailed(); RT_FALL_THRU();
7646 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
7647 default: AssertFailed(); RT_FALL_THRU();
7648 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
7649 }
7650 }
7651 else
7652 {
7653 Assert(idxStackSlot == UINT8_MAX);
7654 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7655 }
7656 pVar->fRegAcquired = true;
7657 return idxReg;
7658}
7659
7660
7661#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7662/**
7663 * Makes sure variable @a idxVar has a SIMD register assigned to it and that it stays
7664 * fixed till we call iemNativeVarRegisterRelease.
7665 *
7666 * @returns The host register number.
7667 * @param pReNative The recompiler state.
7668 * @param idxVar The variable.
7669 * @param poff Pointer to the instruction buffer offset.
7670 * In case a register needs to be freed up or the value
7671 * loaded off the stack.
7672 * @param fInitialized Set if the variable must already have been initialized.
7673 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7674 * the case.
7675 * @param idxRegPref Preferred SIMD register number or UINT8_MAX.
7676 */
7677DECL_HIDDEN_THROW(uint8_t) iemNativeVarSimdRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7678 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7679{
7680 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7681 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7682 Assert( pVar->cbVar == sizeof(RTUINT128U)
7683 || pVar->cbVar == sizeof(RTUINT256U));
7684 Assert(!pVar->fRegAcquired);
7685
7686 uint8_t idxReg = pVar->idxReg;
7687 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs))
7688 {
7689 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7690 && pVar->enmKind < kIemNativeVarKind_End);
7691 pVar->fRegAcquired = true;
7692 return idxReg;
7693 }
7694
7695 /*
7696 * If the kind of variable has not yet been set, default to 'stack'.
7697 */
7698 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7699 && pVar->enmKind < kIemNativeVarKind_End);
7700 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7701 iemNativeVarSetKindToStack(pReNative, idxVar);
7702
7703 /*
7704 * We have to allocate a register for the variable, even if its a stack one
7705 * as we don't know if there are modification being made to it before its
7706 * finalized (todo: analyze and insert hints about that?).
7707 *
7708 * If we can, we try get the correct register for argument variables. This
7709 * is assuming that most argument variables are fetched as close as possible
7710 * to the actual call, so that there aren't any interfering hidden calls
7711 * (memory accesses, etc) inbetween.
7712 *
7713 * If we cannot or it's a variable, we make sure no argument registers
7714 * that will be used by this MC block will be allocated here, and we always
7715 * prefer non-volatile registers to avoid needing to spill stuff for internal
7716 * call.
7717 */
7718 /** @todo Detect too early argument value fetches and warn about hidden
7719 * calls causing less optimal code to be generated in the python script. */
7720
7721 uint8_t const uArgNo = pVar->uArgNo;
7722 Assert(uArgNo == UINT8_MAX); RT_NOREF(uArgNo); /* No SIMD registers as arguments for now. */
7723
7724 /* SIMD is bit simpler for now because there is no support for arguments. */
7725 if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
7726 || (pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegPref)))
7727 {
7728 uint32_t const fNotArgsMask = UINT32_MAX; //~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7729 uint32_t const fRegs = ~pReNative->Core.bmHstSimdRegs
7730 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
7731 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
7732 & fNotArgsMask;
7733 if (fRegs)
7734 {
7735 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
7736 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
7737 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows == 0);
7738 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg)));
7739 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7740 }
7741 else
7742 {
7743 idxReg = iemNativeSimdRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7744 IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & fNotArgsMask);
7745 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7746 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7747 }
7748 }
7749 else
7750 {
7751 idxReg = idxRegPref;
7752 AssertReleaseFailed(); //iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7753 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7754 }
7755 iemNativeSimdRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7756
7757 pVar->fSimdReg = true;
7758 pVar->idxReg = idxReg;
7759
7760 /*
7761 * Load it off the stack if we've got a stack slot.
7762 */
7763 uint8_t const idxStackSlot = pVar->idxStackSlot;
7764 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7765 {
7766 Assert(fInitialized);
7767 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7768 switch (pVar->cbVar)
7769 {
7770 case sizeof(RTUINT128U): *poff = iemNativeEmitLoadVecRegByBpU128(pReNative, *poff, idxReg, offDispBp); break;
7771 default: AssertFailed(); RT_FALL_THRU();
7772 case sizeof(RTUINT256U): *poff = iemNativeEmitLoadVecRegByBpU256(pReNative, *poff, idxReg, offDispBp); break;
7773 }
7774 }
7775 else
7776 {
7777 Assert(idxStackSlot == UINT8_MAX);
7778 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7779 }
7780 pVar->fRegAcquired = true;
7781 return idxReg;
7782}
7783#endif
7784
7785
7786/**
7787 * The value of variable @a idxVar will be written in full to the @a enmGstReg
7788 * guest register.
7789 *
7790 * This function makes sure there is a register for it and sets it to be the
7791 * current shadow copy of @a enmGstReg.
7792 *
7793 * @returns The host register number.
7794 * @param pReNative The recompiler state.
7795 * @param idxVar The variable.
7796 * @param enmGstReg The guest register this variable will be written to
7797 * after this call.
7798 * @param poff Pointer to the instruction buffer offset.
7799 * In case a register needs to be freed up or if the
7800 * variable content needs to be loaded off the stack.
7801 *
7802 * @note We DO NOT expect @a idxVar to be an argument variable,
7803 * because we can only in the commit stage of an instruction when this
7804 * function is used.
7805 */
7806DECL_HIDDEN_THROW(uint8_t)
7807iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
7808{
7809 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7810 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7811 Assert(!pVar->fRegAcquired);
7812 AssertMsgStmt( pVar->cbVar <= 8
7813 && ( pVar->enmKind == kIemNativeVarKind_Immediate
7814 || pVar->enmKind == kIemNativeVarKind_Stack),
7815 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
7816 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
7817 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7818
7819 /*
7820 * This shouldn't ever be used for arguments, unless it's in a weird else
7821 * branch that doesn't do any calling and even then it's questionable.
7822 *
7823 * However, in case someone writes crazy wrong MC code and does register
7824 * updates before making calls, just use the regular register allocator to
7825 * ensure we get a register suitable for the intended argument number.
7826 */
7827 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
7828
7829 /*
7830 * If there is already a register for the variable, we transfer/set the
7831 * guest shadow copy assignment to it.
7832 */
7833 uint8_t idxReg = pVar->idxReg;
7834 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7835 {
7836 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
7837 {
7838 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
7839 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
7840 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
7841 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
7842 }
7843 else
7844 {
7845 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
7846 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
7847 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
7848 }
7849 /** @todo figure this one out. We need some way of making sure the register isn't
7850 * modified after this point, just in case we start writing crappy MC code. */
7851 pVar->enmGstReg = enmGstReg;
7852 pVar->fRegAcquired = true;
7853 return idxReg;
7854 }
7855 Assert(pVar->uArgNo == UINT8_MAX);
7856
7857 /*
7858 * Because this is supposed to be the commit stage, we're just tag along with the
7859 * temporary register allocator and upgrade it to a variable register.
7860 */
7861 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
7862 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
7863 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
7864 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
7865 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
7866 pVar->idxReg = idxReg;
7867
7868 /*
7869 * Now we need to load the register value.
7870 */
7871 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7872 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7873 else
7874 {
7875 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7876 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7877 switch (pVar->cbVar)
7878 {
7879 case sizeof(uint64_t):
7880 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
7881 break;
7882 case sizeof(uint32_t):
7883 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
7884 break;
7885 case sizeof(uint16_t):
7886 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
7887 break;
7888 case sizeof(uint8_t):
7889 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
7890 break;
7891 default:
7892 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7893 }
7894 }
7895
7896 pVar->fRegAcquired = true;
7897 return idxReg;
7898}
7899
7900
7901/**
7902 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
7903 *
7904 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
7905 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
7906 * requirement of flushing anything in volatile host registers when making a
7907 * call.
7908 *
7909 * @returns New @a off value.
7910 * @param pReNative The recompiler state.
7911 * @param off The code buffer position.
7912 * @param fHstRegsNotToSave Set of registers not to save & restore.
7913 */
7914DECL_HIDDEN_THROW(uint32_t)
7915iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7916{
7917 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7918 if (fHstRegs)
7919 {
7920 do
7921 {
7922 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7923 fHstRegs &= ~RT_BIT_32(idxHstReg);
7924
7925 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7926 {
7927 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7928 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7929 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7930 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7931 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7932 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7933 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7934 {
7935 case kIemNativeVarKind_Stack:
7936 {
7937 /* Temporarily spill the variable register. */
7938 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7939 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7940 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7941 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7942 continue;
7943 }
7944
7945 case kIemNativeVarKind_Immediate:
7946 case kIemNativeVarKind_VarRef:
7947 case kIemNativeVarKind_GstRegRef:
7948 /* It is weird to have any of these loaded at this point. */
7949 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7950 continue;
7951
7952 case kIemNativeVarKind_End:
7953 case kIemNativeVarKind_Invalid:
7954 break;
7955 }
7956 AssertFailed();
7957 }
7958 else
7959 {
7960 /*
7961 * Allocate a temporary stack slot and spill the register to it.
7962 */
7963 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7964 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
7965 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7966 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
7967 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
7968 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7969 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7970 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7971 }
7972 } while (fHstRegs);
7973 }
7974#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7975 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
7976 if (fHstRegs)
7977 {
7978 do
7979 {
7980 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7981 fHstRegs &= ~RT_BIT_32(idxHstReg);
7982
7983 /*
7984 * Guest registers are flushed to CPUMCTX at the moment and don't need allocating a stack slot
7985 * which would be more difficult due to spanning multiple stack slots and different sizes
7986 * (besides we only have a limited amount of slots at the moment). Fixed temporary registers
7987 * don't need saving.
7988 */
7989 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp
7990 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved)
7991 continue;
7992
7993 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
7994
7995 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
7996 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7997 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7998 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7999 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
8000 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
8001 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
8002 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
8003 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8004 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8005 {
8006 case kIemNativeVarKind_Stack:
8007 {
8008 /* Temporarily spill the variable register. */
8009 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
8010 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8011 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
8012 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8013 if (cbVar == sizeof(RTUINT128U))
8014 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8015 else
8016 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8017 continue;
8018 }
8019
8020 case kIemNativeVarKind_Immediate:
8021 case kIemNativeVarKind_VarRef:
8022 case kIemNativeVarKind_GstRegRef:
8023 /* It is weird to have any of these loaded at this point. */
8024 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8025 continue;
8026
8027 case kIemNativeVarKind_End:
8028 case kIemNativeVarKind_Invalid:
8029 break;
8030 }
8031 AssertFailed();
8032 } while (fHstRegs);
8033 }
8034#endif
8035 return off;
8036}
8037
8038
8039/**
8040 * Emit code to restore volatile registers after to a call to a helper.
8041 *
8042 * @returns New @a off value.
8043 * @param pReNative The recompiler state.
8044 * @param off The code buffer position.
8045 * @param fHstRegsNotToSave Set of registers not to save & restore.
8046 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
8047 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
8048 */
8049DECL_HIDDEN_THROW(uint32_t)
8050iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
8051{
8052 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
8053 if (fHstRegs)
8054 {
8055 do
8056 {
8057 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8058 fHstRegs &= ~RT_BIT_32(idxHstReg);
8059
8060 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
8061 {
8062 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
8063 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8064 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8065 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8066 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
8067 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8068 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8069 {
8070 case kIemNativeVarKind_Stack:
8071 {
8072 /* Unspill the variable register. */
8073 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8074 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
8075 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8076 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8077 continue;
8078 }
8079
8080 case kIemNativeVarKind_Immediate:
8081 case kIemNativeVarKind_VarRef:
8082 case kIemNativeVarKind_GstRegRef:
8083 /* It is weird to have any of these loaded at this point. */
8084 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8085 continue;
8086
8087 case kIemNativeVarKind_End:
8088 case kIemNativeVarKind_Invalid:
8089 break;
8090 }
8091 AssertFailed();
8092 }
8093 else
8094 {
8095 /*
8096 * Restore from temporary stack slot.
8097 */
8098 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
8099 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
8100 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
8101 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
8102
8103 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8104 }
8105 } while (fHstRegs);
8106 }
8107#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8108 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
8109 if (fHstRegs)
8110 {
8111 do
8112 {
8113 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8114 fHstRegs &= ~RT_BIT_32(idxHstReg);
8115
8116 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp
8117 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved)
8118 continue;
8119 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
8120
8121 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
8122 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8123 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8124 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8125 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
8126 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
8127 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
8128 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
8129 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8130 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8131 {
8132 case kIemNativeVarKind_Stack:
8133 {
8134 /* Unspill the variable register. */
8135 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
8136 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8137 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
8138 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8139
8140 if (cbVar == sizeof(RTUINT128U))
8141 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8142 else
8143 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8144 continue;
8145 }
8146
8147 case kIemNativeVarKind_Immediate:
8148 case kIemNativeVarKind_VarRef:
8149 case kIemNativeVarKind_GstRegRef:
8150 /* It is weird to have any of these loaded at this point. */
8151 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8152 continue;
8153
8154 case kIemNativeVarKind_End:
8155 case kIemNativeVarKind_Invalid:
8156 break;
8157 }
8158 AssertFailed();
8159 } while (fHstRegs);
8160 }
8161#endif
8162 return off;
8163}
8164
8165
8166/**
8167 * Worker that frees the stack slots for variable @a idxVar if any allocated.
8168 *
8169 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
8170 *
8171 * ASSUMES that @a idxVar is valid and unpacked.
8172 */
8173DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8174{
8175 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
8176 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
8177 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
8178 {
8179 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
8180 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
8181 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
8182 Assert(cSlots > 0);
8183 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
8184 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
8185 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
8186 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
8187 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
8188 }
8189 else
8190 Assert(idxStackSlot == UINT8_MAX);
8191}
8192
8193
8194/**
8195 * Worker that frees a single variable.
8196 *
8197 * ASSUMES that @a idxVar is valid and unpacked.
8198 */
8199DECLHIDDEN(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8200{
8201 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
8202 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
8203 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
8204
8205 /* Free the host register first if any assigned. */
8206 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8207#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8208 if ( idxHstReg != UINT8_MAX
8209 && pReNative->Core.aVars[idxVar].fSimdReg)
8210 {
8211 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8212 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8213 pReNative->Core.aHstSimdRegs[idxHstReg].idxVar = UINT8_MAX;
8214 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
8215 }
8216 else
8217#endif
8218 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8219 {
8220 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8221 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8222 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8223 }
8224
8225 /* Free argument mapping. */
8226 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
8227 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
8228 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
8229
8230 /* Free the stack slots. */
8231 iemNativeVarFreeStackSlots(pReNative, idxVar);
8232
8233 /* Free the actual variable. */
8234 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
8235 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8236}
8237
8238
8239/**
8240 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
8241 */
8242DECLHIDDEN(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
8243{
8244 while (bmVars != 0)
8245 {
8246 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8247 bmVars &= ~RT_BIT_32(idxVar);
8248
8249#if 1 /** @todo optimize by simplifying this later... */
8250 iemNativeVarFreeOneWorker(pReNative, idxVar);
8251#else
8252 /* Only need to free the host register, the rest is done as bulk updates below. */
8253 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8254 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8255 {
8256 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8257 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8258 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8259 }
8260#endif
8261 }
8262#if 0 /** @todo optimize by simplifying this later... */
8263 pReNative->Core.bmVars = 0;
8264 pReNative->Core.bmStack = 0;
8265 pReNative->Core.u64ArgVars = UINT64_MAX;
8266#endif
8267}
8268
8269
8270
8271/*********************************************************************************************************************************
8272* Emitters for IEM_MC_CALL_CIMPL_XXX *
8273*********************************************************************************************************************************/
8274
8275/**
8276 * Emits code to load a reference to the given guest register into @a idxGprDst.
8277 */
8278DECL_HIDDEN_THROW(uint32_t)
8279iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
8280 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
8281{
8282#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8283 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
8284#endif
8285
8286 /*
8287 * Get the offset relative to the CPUMCTX structure.
8288 */
8289 uint32_t offCpumCtx;
8290 switch (enmClass)
8291 {
8292 case kIemNativeGstRegRef_Gpr:
8293 Assert(idxRegInClass < 16);
8294 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
8295 break;
8296
8297 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
8298 Assert(idxRegInClass < 4);
8299 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
8300 break;
8301
8302 case kIemNativeGstRegRef_EFlags:
8303 Assert(idxRegInClass == 0);
8304 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
8305 break;
8306
8307 case kIemNativeGstRegRef_MxCsr:
8308 Assert(idxRegInClass == 0);
8309 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
8310 break;
8311
8312 case kIemNativeGstRegRef_FpuReg:
8313 Assert(idxRegInClass < 8);
8314 AssertFailed(); /** @todo what kind of indexing? */
8315 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8316 break;
8317
8318 case kIemNativeGstRegRef_MReg:
8319 Assert(idxRegInClass < 8);
8320 AssertFailed(); /** @todo what kind of indexing? */
8321 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8322 break;
8323
8324 case kIemNativeGstRegRef_XReg:
8325 Assert(idxRegInClass < 16);
8326 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
8327 break;
8328
8329 case kIemNativeGstRegRef_X87: /* Not a register actually but we would just duplicate code otherwise. */
8330 Assert(idxRegInClass == 0);
8331 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87);
8332 break;
8333
8334 case kIemNativeGstRegRef_XState: /* Not a register actually but we would just duplicate code otherwise. */
8335 Assert(idxRegInClass == 0);
8336 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState);
8337 break;
8338
8339 default:
8340 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
8341 }
8342
8343 /*
8344 * Load the value into the destination register.
8345 */
8346#ifdef RT_ARCH_AMD64
8347 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
8348
8349#elif defined(RT_ARCH_ARM64)
8350 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8351 Assert(offCpumCtx < 4096);
8352 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
8353
8354#else
8355# error "Port me!"
8356#endif
8357
8358 return off;
8359}
8360
8361
8362/**
8363 * Common code for CIMPL and AIMPL calls.
8364 *
8365 * These are calls that uses argument variables and such. They should not be
8366 * confused with internal calls required to implement an MC operation,
8367 * like a TLB load and similar.
8368 *
8369 * Upon return all that is left to do is to load any hidden arguments and
8370 * perform the call. All argument variables are freed.
8371 *
8372 * @returns New code buffer offset; throws VBox status code on error.
8373 * @param pReNative The native recompile state.
8374 * @param off The code buffer offset.
8375 * @param cArgs The total nubmer of arguments (includes hidden
8376 * count).
8377 * @param cHiddenArgs The number of hidden arguments. The hidden
8378 * arguments must not have any variable declared for
8379 * them, whereas all the regular arguments must
8380 * (tstIEMCheckMc ensures this).
8381 */
8382DECL_HIDDEN_THROW(uint32_t)
8383iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
8384{
8385#ifdef VBOX_STRICT
8386 /*
8387 * Assert sanity.
8388 */
8389 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
8390 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
8391 for (unsigned i = 0; i < cHiddenArgs; i++)
8392 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
8393 for (unsigned i = cHiddenArgs; i < cArgs; i++)
8394 {
8395 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
8396 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
8397 }
8398 iemNativeRegAssertSanity(pReNative);
8399#endif
8400
8401 /* We don't know what the called function makes use of, so flush any pending register writes. */
8402 off = iemNativeRegFlushPendingWrites(pReNative, off);
8403
8404 /*
8405 * Before we do anything else, go over variables that are referenced and
8406 * make sure they are not in a register.
8407 */
8408 uint32_t bmVars = pReNative->Core.bmVars;
8409 if (bmVars)
8410 {
8411 do
8412 {
8413 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8414 bmVars &= ~RT_BIT_32(idxVar);
8415
8416 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
8417 {
8418 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
8419#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8420 if ( idxRegOld != UINT8_MAX
8421 && pReNative->Core.aVars[idxVar].fSimdReg)
8422 {
8423 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8424 Assert(pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U) || pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT256U));
8425
8426 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8427 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8428 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8429 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8430 if (pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U))
8431 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8432 else
8433 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8434
8435 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
8436 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
8437
8438 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8439 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
8440 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8441 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
8442 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
8443 }
8444 else
8445#endif
8446 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
8447 {
8448 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8449 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8450 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8451 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8452 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8453
8454 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8455 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
8456 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8457 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
8458 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
8459 }
8460 }
8461 } while (bmVars != 0);
8462#if 0 //def VBOX_STRICT
8463 iemNativeRegAssertSanity(pReNative);
8464#endif
8465 }
8466
8467 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
8468
8469 /*
8470 * First, go over the host registers that will be used for arguments and make
8471 * sure they either hold the desired argument or are free.
8472 */
8473 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
8474 {
8475 for (uint32_t i = 0; i < cRegArgs; i++)
8476 {
8477 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8478 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8479 {
8480 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
8481 {
8482 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
8483 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8484 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8485 Assert(pVar->idxReg == idxArgReg);
8486 uint8_t const uArgNo = pVar->uArgNo;
8487 if (uArgNo == i)
8488 { /* prefect */ }
8489 /* The variable allocator logic should make sure this is impossible,
8490 except for when the return register is used as a parameter (ARM,
8491 but not x86). */
8492#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
8493 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
8494 {
8495# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8496# error "Implement this"
8497# endif
8498 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
8499 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
8500 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
8501 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8502 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
8503 }
8504#endif
8505 else
8506 {
8507 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8508
8509 if (pVar->enmKind == kIemNativeVarKind_Stack)
8510 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
8511 else
8512 {
8513 /* just free it, can be reloaded if used again */
8514 pVar->idxReg = UINT8_MAX;
8515 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
8516 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
8517 }
8518 }
8519 }
8520 else
8521 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
8522 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
8523 }
8524 }
8525#if 0 //def VBOX_STRICT
8526 iemNativeRegAssertSanity(pReNative);
8527#endif
8528 }
8529
8530 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
8531
8532#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8533 /*
8534 * If there are any stack arguments, make sure they are in their place as well.
8535 *
8536 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
8537 * the caller) be loading it later and it must be free (see first loop).
8538 */
8539 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
8540 {
8541 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
8542 {
8543 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8544 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
8545 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8546 {
8547 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
8548 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
8549 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
8550 pVar->idxReg = UINT8_MAX;
8551 }
8552 else
8553 {
8554 /* Use ARG0 as temp for stuff we need registers for. */
8555 switch (pVar->enmKind)
8556 {
8557 case kIemNativeVarKind_Stack:
8558 {
8559 uint8_t const idxStackSlot = pVar->idxStackSlot;
8560 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8561 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
8562 iemNativeStackCalcBpDisp(idxStackSlot));
8563 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8564 continue;
8565 }
8566
8567 case kIemNativeVarKind_Immediate:
8568 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
8569 continue;
8570
8571 case kIemNativeVarKind_VarRef:
8572 {
8573 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8574 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8575 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8576 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8577 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8578# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8579 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8580 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8581 if ( fSimdReg
8582 && idxRegOther != UINT8_MAX)
8583 {
8584 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8585 if (cbVar == sizeof(RTUINT128U))
8586 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8587 else
8588 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8589 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8590 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8591 }
8592 else
8593# endif
8594 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8595 {
8596 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8597 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8598 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8599 }
8600 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8601 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8602 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
8603 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8604 continue;
8605 }
8606
8607 case kIemNativeVarKind_GstRegRef:
8608 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
8609 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8610 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8611 continue;
8612
8613 case kIemNativeVarKind_Invalid:
8614 case kIemNativeVarKind_End:
8615 break;
8616 }
8617 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8618 }
8619 }
8620# if 0 //def VBOX_STRICT
8621 iemNativeRegAssertSanity(pReNative);
8622# endif
8623 }
8624#else
8625 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
8626#endif
8627
8628 /*
8629 * Make sure the argument variables are loaded into their respective registers.
8630 *
8631 * We can optimize this by ASSUMING that any register allocations are for
8632 * registeres that have already been loaded and are ready. The previous step
8633 * saw to that.
8634 */
8635 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
8636 {
8637 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8638 {
8639 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8640 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8641 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
8642 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
8643 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
8644 else
8645 {
8646 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8647 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8648 {
8649 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
8650 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
8651 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
8652 | RT_BIT_32(idxArgReg);
8653 pVar->idxReg = idxArgReg;
8654 }
8655 else
8656 {
8657 /* Use ARG0 as temp for stuff we need registers for. */
8658 switch (pVar->enmKind)
8659 {
8660 case kIemNativeVarKind_Stack:
8661 {
8662 uint8_t const idxStackSlot = pVar->idxStackSlot;
8663 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8664 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
8665 continue;
8666 }
8667
8668 case kIemNativeVarKind_Immediate:
8669 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
8670 continue;
8671
8672 case kIemNativeVarKind_VarRef:
8673 {
8674 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8675 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8676 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
8677 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8678 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8679 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8680#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8681 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8682 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8683 if ( fSimdReg
8684 && idxRegOther != UINT8_MAX)
8685 {
8686 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8687 if (cbVar == sizeof(RTUINT128U))
8688 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8689 else
8690 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8691 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8692 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8693 }
8694 else
8695#endif
8696 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8697 {
8698 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8699 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8700 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8701 }
8702 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8703 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8704 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
8705 continue;
8706 }
8707
8708 case kIemNativeVarKind_GstRegRef:
8709 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
8710 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8711 continue;
8712
8713 case kIemNativeVarKind_Invalid:
8714 case kIemNativeVarKind_End:
8715 break;
8716 }
8717 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8718 }
8719 }
8720 }
8721#if 0 //def VBOX_STRICT
8722 iemNativeRegAssertSanity(pReNative);
8723#endif
8724 }
8725#ifdef VBOX_STRICT
8726 else
8727 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8728 {
8729 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
8730 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
8731 }
8732#endif
8733
8734 /*
8735 * Free all argument variables (simplified).
8736 * Their lifetime always expires with the call they are for.
8737 */
8738 /** @todo Make the python script check that arguments aren't used after
8739 * IEM_MC_CALL_XXXX. */
8740 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
8741 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
8742 * an argument value. There is also some FPU stuff. */
8743 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
8744 {
8745 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
8746 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8747
8748 /* no need to free registers: */
8749 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
8750 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
8751 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
8752 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
8753 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
8754 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
8755
8756 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
8757 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8758 iemNativeVarFreeStackSlots(pReNative, idxVar);
8759 }
8760 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8761
8762 /*
8763 * Flush volatile registers as we make the call.
8764 */
8765 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
8766
8767 return off;
8768}
8769
8770
8771
8772/*********************************************************************************************************************************
8773* TLB Lookup. *
8774*********************************************************************************************************************************/
8775
8776/**
8777 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
8778 */
8779DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
8780{
8781 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
8782 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
8783 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
8784 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
8785
8786 /* Do the lookup manually. */
8787 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
8788 uint64_t const uTag = IEMTLB_CALC_TAG( &pVCpu->iem.s.DataTlb, GCPtrFlat);
8789 PIEMTLBENTRY const pTlbe = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
8790 if (RT_LIKELY(pTlbe->uTag == uTag))
8791 {
8792 /*
8793 * Check TLB page table level access flags.
8794 */
8795 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
8796 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
8797 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
8798 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
8799 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
8800 | IEMTLBE_F_PG_UNASSIGNED
8801 | IEMTLBE_F_PT_NO_ACCESSED
8802 | fNoWriteNoDirty | fNoUser);
8803 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
8804 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
8805 {
8806 /*
8807 * Return the address.
8808 */
8809 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
8810 if ((uintptr_t)pbAddr == uResult)
8811 return;
8812 RT_NOREF(cbMem);
8813 AssertFailed();
8814 }
8815 else
8816 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
8817 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
8818 }
8819 else
8820 AssertFailed();
8821 RT_BREAKPOINT();
8822}
8823
8824/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
8825
8826
8827
8828/*********************************************************************************************************************************
8829* Recompiler Core. *
8830*********************************************************************************************************************************/
8831
8832/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
8833static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
8834{
8835 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
8836 pDis->cbCachedInstr += cbMaxRead;
8837 RT_NOREF(cbMinRead);
8838 return VERR_NO_DATA;
8839}
8840
8841
8842DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
8843{
8844 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
8845 {
8846#define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */
8847 ENTRY(fLocalForcedActions),
8848 ENTRY(iem.s.rcPassUp),
8849 ENTRY(iem.s.fExec),
8850 ENTRY(iem.s.pbInstrBuf),
8851 ENTRY(iem.s.uInstrBufPc),
8852 ENTRY(iem.s.GCPhysInstrBuf),
8853 ENTRY(iem.s.cbInstrBufTotal),
8854 ENTRY(iem.s.idxTbCurInstr),
8855#ifdef VBOX_WITH_STATISTICS
8856 ENTRY(iem.s.StatNativeTlbHitsForFetch),
8857 ENTRY(iem.s.StatNativeTlbHitsForStore),
8858 ENTRY(iem.s.StatNativeTlbHitsForStack),
8859 ENTRY(iem.s.StatNativeTlbHitsForMapped),
8860 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
8861 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
8862 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
8863 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
8864#endif
8865 ENTRY(iem.s.DataTlb.aEntries),
8866 ENTRY(iem.s.DataTlb.uTlbRevision),
8867 ENTRY(iem.s.DataTlb.uTlbPhysRev),
8868 ENTRY(iem.s.DataTlb.cTlbHits),
8869 ENTRY(iem.s.CodeTlb.aEntries),
8870 ENTRY(iem.s.CodeTlb.uTlbRevision),
8871 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
8872 ENTRY(iem.s.CodeTlb.cTlbHits),
8873 ENTRY(pVMR3),
8874 ENTRY(cpum.GstCtx.rax),
8875 ENTRY(cpum.GstCtx.ah),
8876 ENTRY(cpum.GstCtx.rcx),
8877 ENTRY(cpum.GstCtx.ch),
8878 ENTRY(cpum.GstCtx.rdx),
8879 ENTRY(cpum.GstCtx.dh),
8880 ENTRY(cpum.GstCtx.rbx),
8881 ENTRY(cpum.GstCtx.bh),
8882 ENTRY(cpum.GstCtx.rsp),
8883 ENTRY(cpum.GstCtx.rbp),
8884 ENTRY(cpum.GstCtx.rsi),
8885 ENTRY(cpum.GstCtx.rdi),
8886 ENTRY(cpum.GstCtx.r8),
8887 ENTRY(cpum.GstCtx.r9),
8888 ENTRY(cpum.GstCtx.r10),
8889 ENTRY(cpum.GstCtx.r11),
8890 ENTRY(cpum.GstCtx.r12),
8891 ENTRY(cpum.GstCtx.r13),
8892 ENTRY(cpum.GstCtx.r14),
8893 ENTRY(cpum.GstCtx.r15),
8894 ENTRY(cpum.GstCtx.es.Sel),
8895 ENTRY(cpum.GstCtx.es.u64Base),
8896 ENTRY(cpum.GstCtx.es.u32Limit),
8897 ENTRY(cpum.GstCtx.es.Attr),
8898 ENTRY(cpum.GstCtx.cs.Sel),
8899 ENTRY(cpum.GstCtx.cs.u64Base),
8900 ENTRY(cpum.GstCtx.cs.u32Limit),
8901 ENTRY(cpum.GstCtx.cs.Attr),
8902 ENTRY(cpum.GstCtx.ss.Sel),
8903 ENTRY(cpum.GstCtx.ss.u64Base),
8904 ENTRY(cpum.GstCtx.ss.u32Limit),
8905 ENTRY(cpum.GstCtx.ss.Attr),
8906 ENTRY(cpum.GstCtx.ds.Sel),
8907 ENTRY(cpum.GstCtx.ds.u64Base),
8908 ENTRY(cpum.GstCtx.ds.u32Limit),
8909 ENTRY(cpum.GstCtx.ds.Attr),
8910 ENTRY(cpum.GstCtx.fs.Sel),
8911 ENTRY(cpum.GstCtx.fs.u64Base),
8912 ENTRY(cpum.GstCtx.fs.u32Limit),
8913 ENTRY(cpum.GstCtx.fs.Attr),
8914 ENTRY(cpum.GstCtx.gs.Sel),
8915 ENTRY(cpum.GstCtx.gs.u64Base),
8916 ENTRY(cpum.GstCtx.gs.u32Limit),
8917 ENTRY(cpum.GstCtx.gs.Attr),
8918 ENTRY(cpum.GstCtx.rip),
8919 ENTRY(cpum.GstCtx.eflags),
8920 ENTRY(cpum.GstCtx.uRipInhibitInt),
8921 ENTRY(cpum.GstCtx.cr0),
8922 ENTRY(cpum.GstCtx.cr4),
8923 ENTRY(cpum.GstCtx.aXcr[0]),
8924 ENTRY(cpum.GstCtx.aXcr[1]),
8925#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8926 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
8927 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
8928 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
8929 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
8930 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
8931 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
8932 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
8933 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
8934 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
8935 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
8936 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
8937 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
8938 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
8939 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
8940 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
8941 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
8942 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
8943 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
8944 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
8945 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
8946 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
8947 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
8948 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
8949 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
8950 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
8951 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
8952 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
8953 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
8954 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
8955 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
8956 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
8957 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
8958#endif
8959#undef ENTRY
8960 };
8961#ifdef VBOX_STRICT
8962 static bool s_fOrderChecked = false;
8963 if (!s_fOrderChecked)
8964 {
8965 s_fOrderChecked = true;
8966 uint32_t offPrev = s_aMembers[0].off;
8967 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
8968 {
8969 Assert(s_aMembers[i].off > offPrev);
8970 offPrev = s_aMembers[i].off;
8971 }
8972 }
8973#endif
8974
8975 /*
8976 * Binary lookup.
8977 */
8978 unsigned iStart = 0;
8979 unsigned iEnd = RT_ELEMENTS(s_aMembers);
8980 for (;;)
8981 {
8982 unsigned const iCur = iStart + (iEnd - iStart) / 2;
8983 uint32_t const offCur = s_aMembers[iCur].off;
8984 if (off < offCur)
8985 {
8986 if (iCur != iStart)
8987 iEnd = iCur;
8988 else
8989 break;
8990 }
8991 else if (off > offCur)
8992 {
8993 if (iCur + 1 < iEnd)
8994 iStart = iCur + 1;
8995 else
8996 break;
8997 }
8998 else
8999 return s_aMembers[iCur].pszName;
9000 }
9001#ifdef VBOX_WITH_STATISTICS
9002 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
9003 return "iem.s.acThreadedFuncStats[iFn]";
9004#endif
9005 return NULL;
9006}
9007
9008
9009DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
9010{
9011 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
9012#if defined(RT_ARCH_AMD64)
9013 static const char * const a_apszMarkers[] =
9014 {
9015 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
9016 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
9017 };
9018#endif
9019
9020 char szDisBuf[512];
9021 DISSTATE Dis;
9022 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
9023 uint32_t const cNative = pTb->Native.cInstructions;
9024 uint32_t offNative = 0;
9025#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9026 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
9027#endif
9028 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9029 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9030 : DISCPUMODE_64BIT;
9031#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9032 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
9033#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9034 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
9035#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9036# error "Port me"
9037#else
9038 csh hDisasm = ~(size_t)0;
9039# if defined(RT_ARCH_AMD64)
9040 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
9041# elif defined(RT_ARCH_ARM64)
9042 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
9043# else
9044# error "Port me"
9045# endif
9046 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
9047
9048 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
9049 //Assert(rcCs == CS_ERR_OK);
9050#endif
9051
9052 /*
9053 * Print TB info.
9054 */
9055 pHlp->pfnPrintf(pHlp,
9056 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
9057 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
9058 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
9059 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
9060#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9061 if (pDbgInfo && pDbgInfo->cEntries > 1)
9062 {
9063 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
9064
9065 /*
9066 * This disassembly is driven by the debug info which follows the native
9067 * code and indicates when it starts with the next guest instructions,
9068 * where labels are and such things.
9069 */
9070 uint32_t idxThreadedCall = 0;
9071 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
9072 uint8_t idxRange = UINT8_MAX;
9073 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
9074 uint32_t offRange = 0;
9075 uint32_t offOpcodes = 0;
9076 uint32_t const cbOpcodes = pTb->cbOpcodes;
9077 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
9078 uint32_t const cDbgEntries = pDbgInfo->cEntries;
9079 uint32_t iDbgEntry = 1;
9080 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
9081
9082 while (offNative < cNative)
9083 {
9084 /* If we're at or have passed the point where the next chunk of debug
9085 info starts, process it. */
9086 if (offDbgNativeNext <= offNative)
9087 {
9088 offDbgNativeNext = UINT32_MAX;
9089 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
9090 {
9091 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
9092 {
9093 case kIemTbDbgEntryType_GuestInstruction:
9094 {
9095 /* Did the exec flag change? */
9096 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
9097 {
9098 pHlp->pfnPrintf(pHlp,
9099 " fExec change %#08x -> %#08x %s\n",
9100 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9101 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9102 szDisBuf, sizeof(szDisBuf)));
9103 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
9104 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9105 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9106 : DISCPUMODE_64BIT;
9107 }
9108
9109 /* New opcode range? We need to fend up a spurious debug info entry here for cases
9110 where the compilation was aborted before the opcode was recorded and the actual
9111 instruction was translated to a threaded call. This may happen when we run out
9112 of ranges, or when some complicated interrupts/FFs are found to be pending or
9113 similar. So, we just deal with it here rather than in the compiler code as it
9114 is a lot simpler to do here. */
9115 if ( idxRange == UINT8_MAX
9116 || idxRange >= cRanges
9117 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
9118 {
9119 idxRange += 1;
9120 if (idxRange < cRanges)
9121 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
9122 else
9123 continue;
9124 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
9125 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
9126 + (pTb->aRanges[idxRange].idxPhysPage == 0
9127 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9128 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
9129 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9130 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
9131 pTb->aRanges[idxRange].idxPhysPage);
9132 GCPhysPc += offRange;
9133 }
9134
9135 /* Disassemble the instruction. */
9136 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
9137 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
9138 uint32_t cbInstr = 1;
9139 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9140 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
9141 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9142 if (RT_SUCCESS(rc))
9143 {
9144 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9145 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9146 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9147 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9148
9149 static unsigned const s_offMarker = 55;
9150 static char const s_szMarker[] = " ; <--- guest";
9151 if (cch < s_offMarker)
9152 {
9153 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
9154 cch = s_offMarker;
9155 }
9156 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
9157 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
9158
9159 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
9160 }
9161 else
9162 {
9163 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
9164 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
9165 cbInstr = 1;
9166 }
9167 GCPhysPc += cbInstr;
9168 offOpcodes += cbInstr;
9169 offRange += cbInstr;
9170 continue;
9171 }
9172
9173 case kIemTbDbgEntryType_ThreadedCall:
9174 pHlp->pfnPrintf(pHlp,
9175 " Call #%u to %s (%u args) - %s\n",
9176 idxThreadedCall,
9177 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9178 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9179 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
9180 idxThreadedCall++;
9181 continue;
9182
9183 case kIemTbDbgEntryType_GuestRegShadowing:
9184 {
9185 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9186 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
9187 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
9188 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
9189 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9190 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
9191 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
9192 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
9193 else
9194 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
9195 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
9196 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9197 continue;
9198 }
9199
9200#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9201 case kIemTbDbgEntryType_GuestSimdRegShadowing:
9202 {
9203 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9204 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
9205 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
9206 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
9207 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9208 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
9209 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
9210 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
9211 else
9212 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
9213 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
9214 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9215 continue;
9216 }
9217#endif
9218
9219 case kIemTbDbgEntryType_Label:
9220 {
9221 const char *pszName = "what_the_fudge";
9222 const char *pszComment = "";
9223 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
9224 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
9225 {
9226 case kIemNativeLabelType_Return: pszName = "Return"; break;
9227 case kIemNativeLabelType_ReturnBreak: pszName = "ReturnBreak"; break;
9228 case kIemNativeLabelType_ReturnWithFlags: pszName = "ReturnWithFlags"; break;
9229 case kIemNativeLabelType_NonZeroRetOrPassUp: pszName = "NonZeroRetOrPassUp"; break;
9230 case kIemNativeLabelType_RaiseDe: pszName = "RaiseDe"; break;
9231 case kIemNativeLabelType_RaiseUd: pszName = "RaiseUd"; break;
9232 case kIemNativeLabelType_RaiseSseRelated: pszName = "RaiseSseRelated"; break;
9233 case kIemNativeLabelType_RaiseAvxRelated: pszName = "RaiseAvxRelated"; break;
9234 case kIemNativeLabelType_RaiseNm: pszName = "RaiseNm"; break;
9235 case kIemNativeLabelType_RaiseGp0: pszName = "RaiseGp0"; break;
9236 case kIemNativeLabelType_RaiseMf: pszName = "RaiseMf"; break;
9237 case kIemNativeLabelType_RaiseXf: pszName = "RaiseXf"; break;
9238 case kIemNativeLabelType_ObsoleteTb: pszName = "ObsoleteTb"; break;
9239 case kIemNativeLabelType_NeedCsLimChecking: pszName = "NeedCsLimChecking"; break;
9240 case kIemNativeLabelType_CheckBranchMiss: pszName = "CheckBranchMiss"; break;
9241 case kIemNativeLabelType_If:
9242 pszName = "If";
9243 fNumbered = true;
9244 break;
9245 case kIemNativeLabelType_Else:
9246 pszName = "Else";
9247 fNumbered = true;
9248 pszComment = " ; regs state restored pre-if-block";
9249 break;
9250 case kIemNativeLabelType_Endif:
9251 pszName = "Endif";
9252 fNumbered = true;
9253 break;
9254 case kIemNativeLabelType_CheckIrq:
9255 pszName = "CheckIrq_CheckVM";
9256 fNumbered = true;
9257 break;
9258 case kIemNativeLabelType_TlbLookup:
9259 pszName = "TlbLookup";
9260 fNumbered = true;
9261 break;
9262 case kIemNativeLabelType_TlbMiss:
9263 pszName = "TlbMiss";
9264 fNumbered = true;
9265 break;
9266 case kIemNativeLabelType_TlbDone:
9267 pszName = "TlbDone";
9268 fNumbered = true;
9269 break;
9270 case kIemNativeLabelType_Invalid:
9271 case kIemNativeLabelType_End:
9272 break;
9273 }
9274 if (fNumbered)
9275 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
9276 else
9277 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
9278 continue;
9279 }
9280
9281 case kIemTbDbgEntryType_NativeOffset:
9282 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
9283 Assert(offDbgNativeNext > offNative);
9284 break;
9285
9286#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
9287 case kIemTbDbgEntryType_DelayedPcUpdate:
9288 pHlp->pfnPrintf(pHlp,
9289 " Updating guest PC value by %u (cInstrSkipped=%u)\n",
9290 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
9291 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
9292 continue;
9293#endif
9294
9295 default:
9296 AssertFailed();
9297 }
9298 iDbgEntry++;
9299 break;
9300 }
9301 }
9302
9303 /*
9304 * Disassemble the next native instruction.
9305 */
9306 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9307# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9308 uint32_t cbInstr = sizeof(paNative[0]);
9309 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9310 if (RT_SUCCESS(rc))
9311 {
9312# if defined(RT_ARCH_AMD64)
9313 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9314 {
9315 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9316 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9317 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9318 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9319 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9320 uInfo & 0x8000 ? "recompiled" : "todo");
9321 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9322 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9323 else
9324 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9325 }
9326 else
9327# endif
9328 {
9329 const char *pszAnnotation = NULL;
9330# ifdef RT_ARCH_AMD64
9331 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9332 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9333 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9334 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9335 PCDISOPPARAM pMemOp;
9336 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))
9337 pMemOp = &Dis.Param1;
9338 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))
9339 pMemOp = &Dis.Param2;
9340 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))
9341 pMemOp = &Dis.Param3;
9342 else
9343 pMemOp = NULL;
9344 if ( pMemOp
9345 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
9346 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
9347 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
9348 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
9349
9350#elif defined(RT_ARCH_ARM64)
9351 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9352 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9353 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9354# else
9355# error "Port me"
9356# endif
9357 if (pszAnnotation)
9358 {
9359 static unsigned const s_offAnnotation = 55;
9360 size_t const cchAnnotation = strlen(pszAnnotation);
9361 size_t cchDis = strlen(szDisBuf);
9362 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
9363 {
9364 if (cchDis < s_offAnnotation)
9365 {
9366 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
9367 cchDis = s_offAnnotation;
9368 }
9369 szDisBuf[cchDis++] = ' ';
9370 szDisBuf[cchDis++] = ';';
9371 szDisBuf[cchDis++] = ' ';
9372 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
9373 }
9374 }
9375 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9376 }
9377 }
9378 else
9379 {
9380# if defined(RT_ARCH_AMD64)
9381 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9382 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9383# elif defined(RT_ARCH_ARM64)
9384 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9385# else
9386# error "Port me"
9387# endif
9388 cbInstr = sizeof(paNative[0]);
9389 }
9390 offNative += cbInstr / sizeof(paNative[0]);
9391
9392# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9393 cs_insn *pInstr;
9394 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9395 (uintptr_t)pNativeCur, 1, &pInstr);
9396 if (cInstrs > 0)
9397 {
9398 Assert(cInstrs == 1);
9399 const char *pszAnnotation = NULL;
9400# if defined(RT_ARCH_ARM64)
9401 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
9402 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
9403 {
9404 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
9405 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
9406 char *psz = strchr(pInstr->op_str, '[');
9407 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
9408 {
9409 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
9410 int32_t off = -1;
9411 psz += 4;
9412 if (*psz == ']')
9413 off = 0;
9414 else if (*psz == ',')
9415 {
9416 psz = RTStrStripL(psz + 1);
9417 if (*psz == '#')
9418 off = RTStrToInt32(&psz[1]);
9419 /** @todo deal with index registers and LSL as well... */
9420 }
9421 if (off >= 0)
9422 pszAnnotation = iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
9423 }
9424 }
9425# endif
9426
9427 size_t const cchOp = strlen(pInstr->op_str);
9428# if defined(RT_ARCH_AMD64)
9429 if (pszAnnotation)
9430 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
9431 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
9432 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9433 else
9434 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9435 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9436
9437# else
9438 if (pszAnnotation)
9439 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
9440 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
9441 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9442 else
9443 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9444 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9445# endif
9446 offNative += pInstr->size / sizeof(*pNativeCur);
9447 cs_free(pInstr, cInstrs);
9448 }
9449 else
9450 {
9451# if defined(RT_ARCH_AMD64)
9452 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9453 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9454# else
9455 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9456# endif
9457 offNative++;
9458 }
9459# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9460 }
9461 }
9462 else
9463#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
9464 {
9465 /*
9466 * No debug info, just disassemble the x86 code and then the native code.
9467 *
9468 * First the guest code:
9469 */
9470 for (unsigned i = 0; i < pTb->cRanges; i++)
9471 {
9472 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
9473 + (pTb->aRanges[i].idxPhysPage == 0
9474 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9475 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
9476 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9477 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
9478 unsigned off = pTb->aRanges[i].offOpcodes;
9479 /** @todo this ain't working when crossing pages! */
9480 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
9481 while (off < cbOpcodes)
9482 {
9483 uint32_t cbInstr = 1;
9484 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9485 &pTb->pabOpcodes[off], cbOpcodes - off,
9486 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9487 if (RT_SUCCESS(rc))
9488 {
9489 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9490 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9491 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9492 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9493 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
9494 GCPhysPc += cbInstr;
9495 off += cbInstr;
9496 }
9497 else
9498 {
9499 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
9500 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
9501 break;
9502 }
9503 }
9504 }
9505
9506 /*
9507 * Then the native code:
9508 */
9509 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
9510 while (offNative < cNative)
9511 {
9512 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9513# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9514 uint32_t cbInstr = sizeof(paNative[0]);
9515 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9516 if (RT_SUCCESS(rc))
9517 {
9518# if defined(RT_ARCH_AMD64)
9519 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9520 {
9521 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9522 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9523 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9524 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9525 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9526 uInfo & 0x8000 ? "recompiled" : "todo");
9527 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9528 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9529 else
9530 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9531 }
9532 else
9533# endif
9534 {
9535# ifdef RT_ARCH_AMD64
9536 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9537 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9538 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9539 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9540# elif defined(RT_ARCH_ARM64)
9541 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9542 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9543 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9544# else
9545# error "Port me"
9546# endif
9547 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9548 }
9549 }
9550 else
9551 {
9552# if defined(RT_ARCH_AMD64)
9553 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9554 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9555# else
9556 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9557# endif
9558 cbInstr = sizeof(paNative[0]);
9559 }
9560 offNative += cbInstr / sizeof(paNative[0]);
9561
9562# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9563 cs_insn *pInstr;
9564 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9565 (uintptr_t)pNativeCur, 1, &pInstr);
9566 if (cInstrs > 0)
9567 {
9568 Assert(cInstrs == 1);
9569# if defined(RT_ARCH_AMD64)
9570 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9571 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9572# else
9573 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9574 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9575# endif
9576 offNative += pInstr->size / sizeof(*pNativeCur);
9577 cs_free(pInstr, cInstrs);
9578 }
9579 else
9580 {
9581# if defined(RT_ARCH_AMD64)
9582 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9583 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9584# else
9585 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9586# endif
9587 offNative++;
9588 }
9589# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9590 }
9591 }
9592
9593#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9594 /* Cleanup. */
9595 cs_close(&hDisasm);
9596#endif
9597}
9598
9599
9600/**
9601 * Recompiles the given threaded TB into a native one.
9602 *
9603 * In case of failure the translation block will be returned as-is.
9604 *
9605 * @returns pTb.
9606 * @param pVCpu The cross context virtual CPU structure of the calling
9607 * thread.
9608 * @param pTb The threaded translation to recompile to native.
9609 */
9610DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
9611{
9612 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
9613
9614 /*
9615 * The first time thru, we allocate the recompiler state, the other times
9616 * we just need to reset it before using it again.
9617 */
9618 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
9619 if (RT_LIKELY(pReNative))
9620 iemNativeReInit(pReNative, pTb);
9621 else
9622 {
9623 pReNative = iemNativeInit(pVCpu, pTb);
9624 AssertReturn(pReNative, pTb);
9625 }
9626
9627#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9628 /*
9629 * First do liveness analysis. This is done backwards.
9630 */
9631 {
9632 uint32_t idxCall = pTb->Thrd.cCalls;
9633 if (idxCall <= pReNative->cLivenessEntriesAlloc)
9634 { /* likely */ }
9635 else
9636 {
9637 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
9638 while (idxCall > cAlloc)
9639 cAlloc *= 2;
9640 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
9641 AssertReturn(pvNew, pTb);
9642 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
9643 pReNative->cLivenessEntriesAlloc = cAlloc;
9644 }
9645 AssertReturn(idxCall > 0, pTb);
9646 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
9647
9648 /* The initial (final) entry. */
9649 idxCall--;
9650 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
9651
9652 /* Loop backwards thru the calls and fill in the other entries. */
9653 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
9654 while (idxCall > 0)
9655 {
9656 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
9657 if (pfnLiveness)
9658 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
9659 else
9660 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
9661 pCallEntry--;
9662 idxCall--;
9663 }
9664
9665# ifdef VBOX_WITH_STATISTICS
9666 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
9667 to 'clobbered' rather that 'input'. */
9668 /** @todo */
9669# endif
9670 }
9671#endif
9672
9673 /*
9674 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
9675 * for aborting if an error happens.
9676 */
9677 uint32_t cCallsLeft = pTb->Thrd.cCalls;
9678#ifdef LOG_ENABLED
9679 uint32_t const cCallsOrg = cCallsLeft;
9680#endif
9681 uint32_t off = 0;
9682 int rc = VINF_SUCCESS;
9683 IEMNATIVE_TRY_SETJMP(pReNative, rc)
9684 {
9685 /*
9686 * Emit prolog code (fixed).
9687 */
9688 off = iemNativeEmitProlog(pReNative, off);
9689
9690 /*
9691 * Convert the calls to native code.
9692 */
9693#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9694 int32_t iGstInstr = -1;
9695#endif
9696#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
9697 uint32_t cThreadedCalls = 0;
9698 uint32_t cRecompiledCalls = 0;
9699#endif
9700#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9701 uint32_t idxCurCall = 0;
9702#endif
9703 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
9704 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
9705 while (cCallsLeft-- > 0)
9706 {
9707 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
9708#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9709 pReNative->idxCurCall = idxCurCall;
9710#endif
9711
9712 /*
9713 * Debug info, assembly markup and statistics.
9714 */
9715#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
9716 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
9717 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
9718#endif
9719#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9720 iemNativeDbgInfoAddNativeOffset(pReNative, off);
9721 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
9722 {
9723 if (iGstInstr < (int32_t)pTb->cInstructions)
9724 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
9725 else
9726 Assert(iGstInstr == pTb->cInstructions);
9727 iGstInstr = pCallEntry->idxInstr;
9728 }
9729 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
9730#endif
9731#if defined(VBOX_STRICT)
9732 off = iemNativeEmitMarker(pReNative, off,
9733 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
9734#endif
9735#if defined(VBOX_STRICT)
9736 iemNativeRegAssertSanity(pReNative);
9737#endif
9738#ifdef VBOX_WITH_STATISTICS
9739 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
9740#endif
9741
9742 /*
9743 * Actual work.
9744 */
9745 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
9746 pfnRecom ? "(recompiled)" : "(todo)"));
9747 if (pfnRecom) /** @todo stats on this. */
9748 {
9749 off = pfnRecom(pReNative, off, pCallEntry);
9750 STAM_REL_STATS({cRecompiledCalls++;});
9751 }
9752 else
9753 {
9754 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
9755 STAM_REL_STATS({cThreadedCalls++;});
9756 }
9757 Assert(off <= pReNative->cInstrBufAlloc);
9758 Assert(pReNative->cCondDepth == 0);
9759
9760#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
9761 if (LogIs2Enabled())
9762 {
9763 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
9764# ifndef IEMLIVENESS_EXTENDED_LAYOUT
9765 static const char s_achState[] = "CUXI";
9766# else
9767 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
9768# endif
9769
9770 char szGpr[17];
9771 for (unsigned i = 0; i < 16; i++)
9772 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
9773 szGpr[16] = '\0';
9774
9775 char szSegBase[X86_SREG_COUNT + 1];
9776 char szSegLimit[X86_SREG_COUNT + 1];
9777 char szSegAttrib[X86_SREG_COUNT + 1];
9778 char szSegSel[X86_SREG_COUNT + 1];
9779 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
9780 {
9781 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
9782 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
9783 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
9784 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
9785 }
9786 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
9787 = szSegSel[X86_SREG_COUNT] = '\0';
9788
9789 char szEFlags[8];
9790 for (unsigned i = 0; i < 7; i++)
9791 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
9792 szEFlags[7] = '\0';
9793
9794 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
9795 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
9796 }
9797#endif
9798
9799 /*
9800 * Advance.
9801 */
9802 pCallEntry++;
9803#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9804 idxCurCall++;
9805#endif
9806 }
9807
9808 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
9809 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
9810 if (!cThreadedCalls)
9811 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
9812
9813 /*
9814 * Emit the epilog code.
9815 */
9816 uint32_t idxReturnLabel;
9817 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
9818
9819 /*
9820 * Generate special jump labels.
9821 */
9822 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
9823 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
9824 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
9825 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
9826
9827 /*
9828 * Generate simple TB tail labels that just calls a help with a pVCpu
9829 * arg and either return or longjmps/throws a non-zero status.
9830 *
9831 * The array entries must be ordered by enmLabel value so we can index
9832 * using fTailLabels bit numbers.
9833 */
9834 typedef IEM_DECL_NATIVE_HLP_PTR(int, PFNIEMNATIVESIMPLETAILLABELCALL,(PVMCPUCC pVCpu));
9835 static struct
9836 {
9837 IEMNATIVELABELTYPE enmLabel;
9838 PFNIEMNATIVESIMPLETAILLABELCALL pfnCallback;
9839 } const g_aSimpleTailLabels[] =
9840 {
9841 { kIemNativeLabelType_Invalid, NULL },
9842 { kIemNativeLabelType_RaiseDe, iemNativeHlpExecRaiseDe },
9843 { kIemNativeLabelType_RaiseUd, iemNativeHlpExecRaiseUd },
9844 { kIemNativeLabelType_RaiseSseRelated, iemNativeHlpExecRaiseSseRelated },
9845 { kIemNativeLabelType_RaiseAvxRelated, iemNativeHlpExecRaiseAvxRelated },
9846 { kIemNativeLabelType_RaiseNm, iemNativeHlpExecRaiseNm },
9847 { kIemNativeLabelType_RaiseGp0, iemNativeHlpExecRaiseGp0 },
9848 { kIemNativeLabelType_RaiseMf, iemNativeHlpExecRaiseMf },
9849 { kIemNativeLabelType_RaiseXf, iemNativeHlpExecRaiseXf },
9850 { kIemNativeLabelType_ObsoleteTb, iemNativeHlpObsoleteTb },
9851 { kIemNativeLabelType_NeedCsLimChecking, iemNativeHlpNeedCsLimChecking },
9852 { kIemNativeLabelType_CheckBranchMiss, iemNativeHlpCheckBranchMiss },
9853 };
9854 AssertCompile(RT_ELEMENTS(g_aSimpleTailLabels) == (unsigned)kIemNativeLabelType_LastSimple + 1U);
9855 AssertCompile(kIemNativeLabelType_Invalid == 0);
9856 uint64_t fTailLabels = pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastSimple + 1U) - 2U);
9857 if (fTailLabels)
9858 {
9859 do
9860 {
9861 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)(ASMBitFirstSetU64(fTailLabels) - 1U);
9862 fTailLabels &= ~RT_BIT_64(enmLabel);
9863 Assert(g_aSimpleTailLabels[enmLabel].enmLabel == enmLabel);
9864
9865 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
9866 Assert(idxLabel != UINT32_MAX);
9867 if (idxLabel != UINT32_MAX)
9868 {
9869 iemNativeLabelDefine(pReNative, idxLabel, off);
9870
9871 /* int pfnCallback(PVMCPUCC pVCpu) */
9872 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9873 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_aSimpleTailLabels[enmLabel].pfnCallback);
9874
9875 /* jump back to the return sequence. */
9876 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
9877 }
9878
9879 } while (fTailLabels);
9880 }
9881 }
9882 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
9883 {
9884 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
9885 return pTb;
9886 }
9887 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
9888 Assert(off <= pReNative->cInstrBufAlloc);
9889
9890 /*
9891 * Make sure all labels has been defined.
9892 */
9893 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
9894#ifdef VBOX_STRICT
9895 uint32_t const cLabels = pReNative->cLabels;
9896 for (uint32_t i = 0; i < cLabels; i++)
9897 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
9898#endif
9899
9900 /*
9901 * Allocate executable memory, copy over the code we've generated.
9902 */
9903 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
9904 if (pTbAllocator->pDelayedFreeHead)
9905 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
9906
9907 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
9908 AssertReturn(paFinalInstrBuf, pTb);
9909 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
9910
9911 /*
9912 * Apply fixups.
9913 */
9914 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
9915 uint32_t const cFixups = pReNative->cFixups;
9916 for (uint32_t i = 0; i < cFixups; i++)
9917 {
9918 Assert(paFixups[i].off < off);
9919 Assert(paFixups[i].idxLabel < cLabels);
9920 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
9921 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
9922 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
9923 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
9924 switch (paFixups[i].enmType)
9925 {
9926#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
9927 case kIemNativeFixupType_Rel32:
9928 Assert(paFixups[i].off + 4 <= off);
9929 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9930 continue;
9931
9932#elif defined(RT_ARCH_ARM64)
9933 case kIemNativeFixupType_RelImm26At0:
9934 {
9935 Assert(paFixups[i].off < off);
9936 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9937 Assert(offDisp >= -262144 && offDisp < 262144);
9938 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
9939 continue;
9940 }
9941
9942 case kIemNativeFixupType_RelImm19At5:
9943 {
9944 Assert(paFixups[i].off < off);
9945 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9946 Assert(offDisp >= -262144 && offDisp < 262144);
9947 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
9948 continue;
9949 }
9950
9951 case kIemNativeFixupType_RelImm14At5:
9952 {
9953 Assert(paFixups[i].off < off);
9954 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9955 Assert(offDisp >= -8192 && offDisp < 8192);
9956 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
9957 continue;
9958 }
9959
9960#endif
9961 case kIemNativeFixupType_Invalid:
9962 case kIemNativeFixupType_End:
9963 break;
9964 }
9965 AssertFailed();
9966 }
9967
9968 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
9969 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
9970
9971 /*
9972 * Convert the translation block.
9973 */
9974 RTMemFree(pTb->Thrd.paCalls);
9975 pTb->Native.paInstructions = paFinalInstrBuf;
9976 pTb->Native.cInstructions = off;
9977 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
9978#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9979 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
9980 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
9981#endif
9982
9983 Assert(pTbAllocator->cThreadedTbs > 0);
9984 pTbAllocator->cThreadedTbs -= 1;
9985 pTbAllocator->cNativeTbs += 1;
9986 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
9987
9988#ifdef LOG_ENABLED
9989 /*
9990 * Disassemble to the log if enabled.
9991 */
9992 if (LogIs3Enabled())
9993 {
9994 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
9995 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
9996# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
9997 RTLogFlush(NULL);
9998# endif
9999 }
10000#endif
10001 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
10002
10003 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
10004 return pTb;
10005}
10006
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette