VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 103942

Last change on this file since 103942 was 103942, checked in by vboxsync, 9 months ago

VMM/IEM: Implement memory stores from SIMD registers and implement native emitters for IEM_MC_STORE_MEM_U128_ALIGN_SSE()/IEM_MC_STORE_MEM_FLAT_U128_ALIGN_SSE(), bugref:10614

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 425.0 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 103942 2024-03-20 10:22:38Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMN8veRecompilerTlbLookup.h"
103#include "IEMNativeFunctions.h"
104
105
106/*
107 * Narrow down configs here to avoid wasting time on unused configs here.
108 * Note! Same checks in IEMAllThrdRecompiler.cpp.
109 */
110
111#ifndef IEM_WITH_CODE_TLB
112# error The code TLB must be enabled for the recompiler.
113#endif
114
115#ifndef IEM_WITH_DATA_TLB
116# error The data TLB must be enabled for the recompiler.
117#endif
118
119#ifndef IEM_WITH_SETJMP
120# error The setjmp approach must be enabled for the recompiler.
121#endif
122
123/** @todo eliminate this clang build hack. */
124#if RT_CLANG_PREREQ(4, 0)
125# pragma GCC diagnostic ignored "-Wunused-function"
126#endif
127
128
129/*********************************************************************************************************************************
130* Internal Functions *
131*********************************************************************************************************************************/
132#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
133static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
134#endif
135DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
136DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
137 IEMNATIVEGSTREG enmGstReg, uint32_t off);
138DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
139
140
141/*********************************************************************************************************************************
142* Executable Memory Allocator *
143*********************************************************************************************************************************/
144/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
145 * Use an alternative chunk sub-allocator that does store internal data
146 * in the chunk.
147 *
148 * Using the RTHeapSimple is not practial on newer darwin systems where
149 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
150 * memory. We would have to change the protection of the whole chunk for
151 * every call to RTHeapSimple, which would be rather expensive.
152 *
153 * This alternative implemenation let restrict page protection modifications
154 * to the pages backing the executable memory we just allocated.
155 */
156#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
157/** The chunk sub-allocation unit size in bytes. */
158#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
159/** The chunk sub-allocation unit size as a shift factor. */
160#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
161
162#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
163# ifdef IEMNATIVE_USE_GDB_JIT
164# define IEMNATIVE_USE_GDB_JIT_ET_DYN
165
166/** GDB JIT: Code entry. */
167typedef struct GDBJITCODEENTRY
168{
169 struct GDBJITCODEENTRY *pNext;
170 struct GDBJITCODEENTRY *pPrev;
171 uint8_t *pbSymFile;
172 uint64_t cbSymFile;
173} GDBJITCODEENTRY;
174
175/** GDB JIT: Actions. */
176typedef enum GDBJITACTIONS : uint32_t
177{
178 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
179} GDBJITACTIONS;
180
181/** GDB JIT: Descriptor. */
182typedef struct GDBJITDESCRIPTOR
183{
184 uint32_t uVersion;
185 GDBJITACTIONS enmAction;
186 GDBJITCODEENTRY *pRelevant;
187 GDBJITCODEENTRY *pHead;
188 /** Our addition: */
189 GDBJITCODEENTRY *pTail;
190} GDBJITDESCRIPTOR;
191
192/** GDB JIT: Our simple symbol file data. */
193typedef struct GDBJITSYMFILE
194{
195 Elf64_Ehdr EHdr;
196# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
197 Elf64_Shdr aShdrs[5];
198# else
199 Elf64_Shdr aShdrs[7];
200 Elf64_Phdr aPhdrs[2];
201# endif
202 /** The dwarf ehframe data for the chunk. */
203 uint8_t abEhFrame[512];
204 char szzStrTab[128];
205 Elf64_Sym aSymbols[3];
206# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
207 Elf64_Sym aDynSyms[2];
208 Elf64_Dyn aDyn[6];
209# endif
210} GDBJITSYMFILE;
211
212extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
213extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
214
215/** Init once for g_IemNativeGdbJitLock. */
216static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
217/** Init once for the critical section. */
218static RTCRITSECT g_IemNativeGdbJitLock;
219
220/** GDB reads the info here. */
221GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
222
223/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
224DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
225{
226 ASMNopPause();
227}
228
229/** @callback_method_impl{FNRTONCE} */
230static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
231{
232 RT_NOREF(pvUser);
233 return RTCritSectInit(&g_IemNativeGdbJitLock);
234}
235
236
237# endif /* IEMNATIVE_USE_GDB_JIT */
238
239/**
240 * Per-chunk unwind info for non-windows hosts.
241 */
242typedef struct IEMEXECMEMCHUNKEHFRAME
243{
244# ifdef IEMNATIVE_USE_LIBUNWIND
245 /** The offset of the FDA into abEhFrame. */
246 uintptr_t offFda;
247# else
248 /** 'struct object' storage area. */
249 uint8_t abObject[1024];
250# endif
251# ifdef IEMNATIVE_USE_GDB_JIT
252# if 0
253 /** The GDB JIT 'symbol file' data. */
254 GDBJITSYMFILE GdbJitSymFile;
255# endif
256 /** The GDB JIT list entry. */
257 GDBJITCODEENTRY GdbJitEntry;
258# endif
259 /** The dwarf ehframe data for the chunk. */
260 uint8_t abEhFrame[512];
261} IEMEXECMEMCHUNKEHFRAME;
262/** Pointer to per-chunk info info for non-windows hosts. */
263typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
264#endif
265
266
267/**
268 * An chunk of executable memory.
269 */
270typedef struct IEMEXECMEMCHUNK
271{
272#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
273 /** Number of free items in this chunk. */
274 uint32_t cFreeUnits;
275 /** Hint were to start searching for free space in the allocation bitmap. */
276 uint32_t idxFreeHint;
277#else
278 /** The heap handle. */
279 RTHEAPSIMPLE hHeap;
280#endif
281 /** Pointer to the chunk. */
282 void *pvChunk;
283#ifdef IN_RING3
284 /**
285 * Pointer to the unwind information.
286 *
287 * This is used during C++ throw and longjmp (windows and probably most other
288 * platforms). Some debuggers (windbg) makes use of it as well.
289 *
290 * Windows: This is allocated from hHeap on windows because (at least for
291 * AMD64) the UNWIND_INFO structure address in the
292 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
293 *
294 * Others: Allocated from the regular heap to avoid unnecessary executable data
295 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
296 void *pvUnwindInfo;
297#elif defined(IN_RING0)
298 /** Allocation handle. */
299 RTR0MEMOBJ hMemObj;
300#endif
301} IEMEXECMEMCHUNK;
302/** Pointer to a memory chunk. */
303typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
304
305
306/**
307 * Executable memory allocator for the native recompiler.
308 */
309typedef struct IEMEXECMEMALLOCATOR
310{
311 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
312 uint32_t uMagic;
313
314 /** The chunk size. */
315 uint32_t cbChunk;
316 /** The maximum number of chunks. */
317 uint32_t cMaxChunks;
318 /** The current number of chunks. */
319 uint32_t cChunks;
320 /** Hint where to start looking for available memory. */
321 uint32_t idxChunkHint;
322 /** Statistics: Current number of allocations. */
323 uint32_t cAllocations;
324
325 /** The total amount of memory available. */
326 uint64_t cbTotal;
327 /** Total amount of free memory. */
328 uint64_t cbFree;
329 /** Total amount of memory allocated. */
330 uint64_t cbAllocated;
331
332#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
333 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
334 *
335 * Since the chunk size is a power of two and the minimum chunk size is a lot
336 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
337 * require a whole number of uint64_t elements in the allocation bitmap. So,
338 * for sake of simplicity, they are allocated as one continous chunk for
339 * simplicity/laziness. */
340 uint64_t *pbmAlloc;
341 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
342 uint32_t cUnitsPerChunk;
343 /** Number of bitmap elements per chunk (for quickly locating the bitmap
344 * portion corresponding to an chunk). */
345 uint32_t cBitmapElementsPerChunk;
346#else
347 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
348 * @{ */
349 /** The size of the heap internal block header. This is used to adjust the
350 * request memory size to make sure there is exacly enough room for a header at
351 * the end of the blocks we allocate before the next 64 byte alignment line. */
352 uint32_t cbHeapBlockHdr;
353 /** The size of initial heap allocation required make sure the first
354 * allocation is correctly aligned. */
355 uint32_t cbHeapAlignTweak;
356 /** The alignment tweak allocation address. */
357 void *pvAlignTweak;
358 /** @} */
359#endif
360
361#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
362 /** Pointer to the array of unwind info running parallel to aChunks (same
363 * allocation as this structure, located after the bitmaps).
364 * (For Windows, the structures must reside in 32-bit RVA distance to the
365 * actual chunk, so they are allocated off the chunk.) */
366 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
367#endif
368
369 /** The allocation chunks. */
370 RT_FLEXIBLE_ARRAY_EXTENSION
371 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
372} IEMEXECMEMALLOCATOR;
373/** Pointer to an executable memory allocator. */
374typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
375
376/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
377#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
378
379
380static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
381
382
383/**
384 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
385 * the heap statistics.
386 */
387static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
388 uint32_t cbReq, uint32_t idxChunk)
389{
390 pExecMemAllocator->cAllocations += 1;
391 pExecMemAllocator->cbAllocated += cbReq;
392#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
393 pExecMemAllocator->cbFree -= cbReq;
394#else
395 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
396#endif
397 pExecMemAllocator->idxChunkHint = idxChunk;
398
399#ifdef RT_OS_DARWIN
400 /*
401 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
402 * on darwin. So, we mark the pages returned as read+write after alloc and
403 * expect the caller to call iemExecMemAllocatorReadyForUse when done
404 * writing to the allocation.
405 *
406 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
407 * for details.
408 */
409 /** @todo detect if this is necessary... it wasn't required on 10.15 or
410 * whatever older version it was. */
411 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
412 AssertRC(rc);
413#endif
414
415 return pvRet;
416}
417
418
419#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
420static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
421 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
422{
423 /*
424 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
425 */
426 Assert(!(cToScan & 63));
427 Assert(!(idxFirst & 63));
428 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
429 pbmAlloc += idxFirst / 64;
430
431 /*
432 * Scan the bitmap for cReqUnits of consequtive clear bits
433 */
434 /** @todo This can probably be done more efficiently for non-x86 systems. */
435 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
436 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
437 {
438 uint32_t idxAddBit = 1;
439 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
440 idxAddBit++;
441 if (idxAddBit >= cReqUnits)
442 {
443 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
444
445 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
446 pChunk->cFreeUnits -= cReqUnits;
447 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
448
449 void * const pvRet = (uint8_t *)pChunk->pvChunk
450 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
451
452 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
453 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
454 }
455
456 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
457 }
458 return NULL;
459}
460#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
461
462
463static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
464{
465#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
466 /*
467 * Figure out how much to allocate.
468 */
469 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
470 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
471 {
472 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
473 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
474 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
475 {
476 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
477 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
478 if (pvRet)
479 return pvRet;
480 }
481 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
482 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
483 cReqUnits, idxChunk);
484 }
485#else
486 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
487 if (pvRet)
488 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
489#endif
490 return NULL;
491
492}
493
494
495/**
496 * Allocates @a cbReq bytes of executable memory.
497 *
498 * @returns Pointer to the memory, NULL if out of memory or other problem
499 * encountered.
500 * @param pVCpu The cross context virtual CPU structure of the calling
501 * thread.
502 * @param cbReq How many bytes are required.
503 */
504static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
505{
506 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
507 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
508 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
509
510
511 for (unsigned iIteration = 0;; iIteration++)
512 {
513 /*
514 * Adjust the request size so it'll fit the allocator alignment/whatnot.
515 *
516 * For the RTHeapSimple allocator this means to follow the logic described
517 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
518 * existing chunks if we think we've got sufficient free memory around.
519 *
520 * While for the alternative one we just align it up to a whole unit size.
521 */
522#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
523 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
524#else
525 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
526#endif
527 if (cbReq <= pExecMemAllocator->cbFree)
528 {
529 uint32_t const cChunks = pExecMemAllocator->cChunks;
530 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
531 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
532 {
533 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
534 if (pvRet)
535 return pvRet;
536 }
537 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
538 {
539 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
540 if (pvRet)
541 return pvRet;
542 }
543 }
544
545 /*
546 * Can we grow it with another chunk?
547 */
548 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
549 {
550 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
551 AssertLogRelRCReturn(rc, NULL);
552
553 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
554 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
555 if (pvRet)
556 return pvRet;
557 AssertFailed();
558 }
559
560 /*
561 * Try prune native TBs once.
562 */
563 if (iIteration == 0)
564 iemTbAllocatorFreeupNativeSpace(pVCpu, cbReq / sizeof(IEMNATIVEINSTR));
565 else
566 {
567 /** @todo stats... */
568 return NULL;
569 }
570 }
571
572}
573
574
575/** This is a hook that we may need later for changing memory protection back
576 * to readonly+exec */
577static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
578{
579#ifdef RT_OS_DARWIN
580 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
581 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
582 AssertRC(rc); RT_NOREF(pVCpu);
583
584 /*
585 * Flush the instruction cache:
586 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
587 */
588 /* sys_dcache_flush(pv, cb); - not necessary */
589 sys_icache_invalidate(pv, cb);
590#else
591 RT_NOREF(pVCpu, pv, cb);
592#endif
593}
594
595
596/**
597 * Frees executable memory.
598 */
599void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
600{
601 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
602 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
603 Assert(pv);
604#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
605 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
606#else
607 Assert(!((uintptr_t)pv & 63));
608#endif
609
610 /* Align the size as we did when allocating the block. */
611#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
612 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
613#else
614 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
615#endif
616
617 /* Free it / assert sanity. */
618#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
619 uint32_t const cChunks = pExecMemAllocator->cChunks;
620 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
621 bool fFound = false;
622 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
623 {
624 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
625 fFound = offChunk < cbChunk;
626 if (fFound)
627 {
628#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
629 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
630 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
631
632 /* Check that it's valid and free it. */
633 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
634 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
635 for (uint32_t i = 1; i < cReqUnits; i++)
636 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
637 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
638
639 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
640 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
641
642 /* Update the stats. */
643 pExecMemAllocator->cbAllocated -= cb;
644 pExecMemAllocator->cbFree += cb;
645 pExecMemAllocator->cAllocations -= 1;
646 return;
647#else
648 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
649 break;
650#endif
651 }
652 }
653# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
654 AssertFailed();
655# else
656 Assert(fFound);
657# endif
658#endif
659
660#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
661 /* Update stats while cb is freshly calculated.*/
662 pExecMemAllocator->cbAllocated -= cb;
663 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
664 pExecMemAllocator->cAllocations -= 1;
665
666 /* Free it. */
667 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
668#endif
669}
670
671
672
673#ifdef IN_RING3
674# ifdef RT_OS_WINDOWS
675
676/**
677 * Initializes the unwind info structures for windows hosts.
678 */
679static int
680iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
681 void *pvChunk, uint32_t idxChunk)
682{
683 RT_NOREF(pVCpu);
684
685 /*
686 * The AMD64 unwind opcodes.
687 *
688 * This is a program that starts with RSP after a RET instruction that
689 * ends up in recompiled code, and the operations we describe here will
690 * restore all non-volatile registers and bring RSP back to where our
691 * RET address is. This means it's reverse order from what happens in
692 * the prologue.
693 *
694 * Note! Using a frame register approach here both because we have one
695 * and but mainly because the UWOP_ALLOC_LARGE argument values
696 * would be a pain to write initializers for. On the positive
697 * side, we're impervious to changes in the the stack variable
698 * area can can deal with dynamic stack allocations if necessary.
699 */
700 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
701 {
702 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
703 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
704 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
705 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
706 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
707 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
708 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
709 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
710 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
711 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
712 };
713 union
714 {
715 IMAGE_UNWIND_INFO Info;
716 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
717 } s_UnwindInfo =
718 {
719 {
720 /* .Version = */ 1,
721 /* .Flags = */ 0,
722 /* .SizeOfProlog = */ 16, /* whatever */
723 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
724 /* .FrameRegister = */ X86_GREG_xBP,
725 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
726 }
727 };
728 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
729 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
730
731 /*
732 * Calc how much space we need and allocate it off the exec heap.
733 */
734 unsigned const cFunctionEntries = 1;
735 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
736 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
737# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
738 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
739 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
740 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
741# else
742 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
743 - pExecMemAllocator->cbHeapBlockHdr;
744 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
745 32 /*cbAlignment*/);
746# endif
747 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
748 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
749
750 /*
751 * Initialize the structures.
752 */
753 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
754
755 paFunctions[0].BeginAddress = 0;
756 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
757 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
758
759 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
760 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
761
762 /*
763 * Register it.
764 */
765 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
766 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
767
768 return VINF_SUCCESS;
769}
770
771
772# else /* !RT_OS_WINDOWS */
773
774/**
775 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
776 */
777DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
778{
779 if (iValue >= 64)
780 {
781 Assert(iValue < 0x2000);
782 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
783 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
784 }
785 else if (iValue >= 0)
786 *Ptr.pb++ = (uint8_t)iValue;
787 else if (iValue > -64)
788 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
789 else
790 {
791 Assert(iValue > -0x2000);
792 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
793 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
794 }
795 return Ptr;
796}
797
798
799/**
800 * Emits an ULEB128 encoded value (up to 64-bit wide).
801 */
802DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
803{
804 while (uValue >= 0x80)
805 {
806 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
807 uValue >>= 7;
808 }
809 *Ptr.pb++ = (uint8_t)uValue;
810 return Ptr;
811}
812
813
814/**
815 * Emits a CFA rule as register @a uReg + offset @a off.
816 */
817DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
818{
819 *Ptr.pb++ = DW_CFA_def_cfa;
820 Ptr = iemDwarfPutUleb128(Ptr, uReg);
821 Ptr = iemDwarfPutUleb128(Ptr, off);
822 return Ptr;
823}
824
825
826/**
827 * Emits a register (@a uReg) save location:
828 * CFA + @a off * data_alignment_factor
829 */
830DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
831{
832 if (uReg < 0x40)
833 *Ptr.pb++ = DW_CFA_offset | uReg;
834 else
835 {
836 *Ptr.pb++ = DW_CFA_offset_extended;
837 Ptr = iemDwarfPutUleb128(Ptr, uReg);
838 }
839 Ptr = iemDwarfPutUleb128(Ptr, off);
840 return Ptr;
841}
842
843
844# if 0 /* unused */
845/**
846 * Emits a register (@a uReg) save location, using signed offset:
847 * CFA + @a offSigned * data_alignment_factor
848 */
849DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
850{
851 *Ptr.pb++ = DW_CFA_offset_extended_sf;
852 Ptr = iemDwarfPutUleb128(Ptr, uReg);
853 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
854 return Ptr;
855}
856# endif
857
858
859/**
860 * Initializes the unwind info section for non-windows hosts.
861 */
862static int
863iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
864 void *pvChunk, uint32_t idxChunk)
865{
866 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
867 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
868
869 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
870
871 /*
872 * Generate the CIE first.
873 */
874# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
875 uint8_t const iDwarfVer = 3;
876# else
877 uint8_t const iDwarfVer = 4;
878# endif
879 RTPTRUNION const PtrCie = Ptr;
880 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
881 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
882 *Ptr.pb++ = iDwarfVer; /* DwARF version */
883 *Ptr.pb++ = 0; /* Augmentation. */
884 if (iDwarfVer >= 4)
885 {
886 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
887 *Ptr.pb++ = 0; /* Segment selector size. */
888 }
889# ifdef RT_ARCH_AMD64
890 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
891# else
892 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
893# endif
894 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
895# ifdef RT_ARCH_AMD64
896 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
897# elif defined(RT_ARCH_ARM64)
898 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
899# else
900# error "port me"
901# endif
902 /* Initial instructions: */
903# ifdef RT_ARCH_AMD64
904 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
905 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
906 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
907 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
908 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
909 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
910 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
911 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
912# elif defined(RT_ARCH_ARM64)
913# if 1
914 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
915# else
916 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
917# endif
918 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
919 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
920 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
921 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
922 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
923 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
924 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
925 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
926 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
927 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
928 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
929 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
930 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
931 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
932# else
933# error "port me"
934# endif
935 while ((Ptr.u - PtrCie.u) & 3)
936 *Ptr.pb++ = DW_CFA_nop;
937 /* Finalize the CIE size. */
938 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
939
940 /*
941 * Generate an FDE for the whole chunk area.
942 */
943# ifdef IEMNATIVE_USE_LIBUNWIND
944 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
945# endif
946 RTPTRUNION const PtrFde = Ptr;
947 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
948 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
949 Ptr.pu32++;
950 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
951 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
952# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
953 *Ptr.pb++ = DW_CFA_nop;
954# endif
955 while ((Ptr.u - PtrFde.u) & 3)
956 *Ptr.pb++ = DW_CFA_nop;
957 /* Finalize the FDE size. */
958 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
959
960 /* Terminator entry. */
961 *Ptr.pu32++ = 0;
962 *Ptr.pu32++ = 0; /* just to be sure... */
963 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
964
965 /*
966 * Register it.
967 */
968# ifdef IEMNATIVE_USE_LIBUNWIND
969 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
970# else
971 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
972 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
973# endif
974
975# ifdef IEMNATIVE_USE_GDB_JIT
976 /*
977 * Now for telling GDB about this (experimental).
978 *
979 * This seems to work best with ET_DYN.
980 */
981 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
982# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
983 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
984 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
985# else
986 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
987 - pExecMemAllocator->cbHeapBlockHdr;
988 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
989# endif
990 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
991 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
992
993 RT_ZERO(*pSymFile);
994
995 /*
996 * The ELF header:
997 */
998 pSymFile->EHdr.e_ident[0] = ELFMAG0;
999 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1000 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1001 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1002 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1003 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1004 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1005 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1006# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1007 pSymFile->EHdr.e_type = ET_DYN;
1008# else
1009 pSymFile->EHdr.e_type = ET_REL;
1010# endif
1011# ifdef RT_ARCH_AMD64
1012 pSymFile->EHdr.e_machine = EM_AMD64;
1013# elif defined(RT_ARCH_ARM64)
1014 pSymFile->EHdr.e_machine = EM_AARCH64;
1015# else
1016# error "port me"
1017# endif
1018 pSymFile->EHdr.e_version = 1; /*?*/
1019 pSymFile->EHdr.e_entry = 0;
1020# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1021 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1022# else
1023 pSymFile->EHdr.e_phoff = 0;
1024# endif
1025 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1026 pSymFile->EHdr.e_flags = 0;
1027 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1028# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1029 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1030 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1031# else
1032 pSymFile->EHdr.e_phentsize = 0;
1033 pSymFile->EHdr.e_phnum = 0;
1034# endif
1035 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1036 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1037 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1038
1039 uint32_t offStrTab = 0;
1040#define APPEND_STR(a_szStr) do { \
1041 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1042 offStrTab += sizeof(a_szStr); \
1043 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1044 } while (0)
1045#define APPEND_STR_FMT(a_szStr, ...) do { \
1046 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1047 offStrTab++; \
1048 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1049 } while (0)
1050
1051 /*
1052 * Section headers.
1053 */
1054 /* Section header #0: NULL */
1055 unsigned i = 0;
1056 APPEND_STR("");
1057 RT_ZERO(pSymFile->aShdrs[i]);
1058 i++;
1059
1060 /* Section header: .eh_frame */
1061 pSymFile->aShdrs[i].sh_name = offStrTab;
1062 APPEND_STR(".eh_frame");
1063 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1064 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1065# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1066 pSymFile->aShdrs[i].sh_offset
1067 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1068# else
1069 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1070 pSymFile->aShdrs[i].sh_offset = 0;
1071# endif
1072
1073 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1074 pSymFile->aShdrs[i].sh_link = 0;
1075 pSymFile->aShdrs[i].sh_info = 0;
1076 pSymFile->aShdrs[i].sh_addralign = 1;
1077 pSymFile->aShdrs[i].sh_entsize = 0;
1078 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1079 i++;
1080
1081 /* Section header: .shstrtab */
1082 unsigned const iShStrTab = i;
1083 pSymFile->EHdr.e_shstrndx = iShStrTab;
1084 pSymFile->aShdrs[i].sh_name = offStrTab;
1085 APPEND_STR(".shstrtab");
1086 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1087 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1088# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1089 pSymFile->aShdrs[i].sh_offset
1090 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1091# else
1092 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1093 pSymFile->aShdrs[i].sh_offset = 0;
1094# endif
1095 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1096 pSymFile->aShdrs[i].sh_link = 0;
1097 pSymFile->aShdrs[i].sh_info = 0;
1098 pSymFile->aShdrs[i].sh_addralign = 1;
1099 pSymFile->aShdrs[i].sh_entsize = 0;
1100 i++;
1101
1102 /* Section header: .symbols */
1103 pSymFile->aShdrs[i].sh_name = offStrTab;
1104 APPEND_STR(".symtab");
1105 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1106 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1107 pSymFile->aShdrs[i].sh_offset
1108 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1109 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1110 pSymFile->aShdrs[i].sh_link = iShStrTab;
1111 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1112 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1113 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1114 i++;
1115
1116# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1117 /* Section header: .symbols */
1118 pSymFile->aShdrs[i].sh_name = offStrTab;
1119 APPEND_STR(".dynsym");
1120 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1121 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1122 pSymFile->aShdrs[i].sh_offset
1123 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1124 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1125 pSymFile->aShdrs[i].sh_link = iShStrTab;
1126 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1127 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1128 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1129 i++;
1130# endif
1131
1132# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1133 /* Section header: .dynamic */
1134 pSymFile->aShdrs[i].sh_name = offStrTab;
1135 APPEND_STR(".dynamic");
1136 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1137 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1138 pSymFile->aShdrs[i].sh_offset
1139 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1140 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1141 pSymFile->aShdrs[i].sh_link = iShStrTab;
1142 pSymFile->aShdrs[i].sh_info = 0;
1143 pSymFile->aShdrs[i].sh_addralign = 1;
1144 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1145 i++;
1146# endif
1147
1148 /* Section header: .text */
1149 unsigned const iShText = i;
1150 pSymFile->aShdrs[i].sh_name = offStrTab;
1151 APPEND_STR(".text");
1152 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1153 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1154# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1155 pSymFile->aShdrs[i].sh_offset
1156 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1157# else
1158 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1159 pSymFile->aShdrs[i].sh_offset = 0;
1160# endif
1161 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1162 pSymFile->aShdrs[i].sh_link = 0;
1163 pSymFile->aShdrs[i].sh_info = 0;
1164 pSymFile->aShdrs[i].sh_addralign = 1;
1165 pSymFile->aShdrs[i].sh_entsize = 0;
1166 i++;
1167
1168 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1169
1170# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1171 /*
1172 * The program headers:
1173 */
1174 /* Everything in a single LOAD segment: */
1175 i = 0;
1176 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1177 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1178 pSymFile->aPhdrs[i].p_offset
1179 = pSymFile->aPhdrs[i].p_vaddr
1180 = pSymFile->aPhdrs[i].p_paddr = 0;
1181 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1182 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1183 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1184 i++;
1185 /* The .dynamic segment. */
1186 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1187 pSymFile->aPhdrs[i].p_flags = PF_R;
1188 pSymFile->aPhdrs[i].p_offset
1189 = pSymFile->aPhdrs[i].p_vaddr
1190 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1191 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1192 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1193 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1194 i++;
1195
1196 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1197
1198 /*
1199 * The dynamic section:
1200 */
1201 i = 0;
1202 pSymFile->aDyn[i].d_tag = DT_SONAME;
1203 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1204 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1205 i++;
1206 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1207 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1208 i++;
1209 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1210 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1211 i++;
1212 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1213 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1214 i++;
1215 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1216 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1217 i++;
1218 pSymFile->aDyn[i].d_tag = DT_NULL;
1219 i++;
1220 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1221# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1222
1223 /*
1224 * Symbol tables:
1225 */
1226 /** @todo gdb doesn't seem to really like this ... */
1227 i = 0;
1228 pSymFile->aSymbols[i].st_name = 0;
1229 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1230 pSymFile->aSymbols[i].st_value = 0;
1231 pSymFile->aSymbols[i].st_size = 0;
1232 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1233 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1234# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1235 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1236# endif
1237 i++;
1238
1239 pSymFile->aSymbols[i].st_name = 0;
1240 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1241 pSymFile->aSymbols[i].st_value = 0;
1242 pSymFile->aSymbols[i].st_size = 0;
1243 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1244 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1245 i++;
1246
1247 pSymFile->aSymbols[i].st_name = offStrTab;
1248 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1249# if 0
1250 pSymFile->aSymbols[i].st_shndx = iShText;
1251 pSymFile->aSymbols[i].st_value = 0;
1252# else
1253 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1254 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1255# endif
1256 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1257 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1258 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1259# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1260 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1261 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1262# endif
1263 i++;
1264
1265 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1266 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1267
1268 /*
1269 * The GDB JIT entry and informing GDB.
1270 */
1271 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1272# if 1
1273 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1274# else
1275 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1276# endif
1277
1278 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1279 RTCritSectEnter(&g_IemNativeGdbJitLock);
1280 pEhFrame->GdbJitEntry.pNext = NULL;
1281 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1282 if (__jit_debug_descriptor.pTail)
1283 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1284 else
1285 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1286 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1287 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1288
1289 /* Notify GDB: */
1290 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1291 __jit_debug_register_code();
1292 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1293 RTCritSectLeave(&g_IemNativeGdbJitLock);
1294
1295# else /* !IEMNATIVE_USE_GDB_JIT */
1296 RT_NOREF(pVCpu);
1297# endif /* !IEMNATIVE_USE_GDB_JIT */
1298
1299 return VINF_SUCCESS;
1300}
1301
1302# endif /* !RT_OS_WINDOWS */
1303#endif /* IN_RING3 */
1304
1305
1306/**
1307 * Adds another chunk to the executable memory allocator.
1308 *
1309 * This is used by the init code for the initial allocation and later by the
1310 * regular allocator function when it's out of memory.
1311 */
1312static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1313{
1314 /* Check that we've room for growth. */
1315 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1316 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1317
1318 /* Allocate a chunk. */
1319#ifdef RT_OS_DARWIN
1320 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1321#else
1322 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1323#endif
1324 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1325
1326#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1327 int rc = VINF_SUCCESS;
1328#else
1329 /* Initialize the heap for the chunk. */
1330 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1331 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1332 AssertRC(rc);
1333 if (RT_SUCCESS(rc))
1334 {
1335 /*
1336 * We want the memory to be aligned on 64 byte, so the first time thru
1337 * here we do some exploratory allocations to see how we can achieve this.
1338 * On subsequent runs we only make an initial adjustment allocation, if
1339 * necessary.
1340 *
1341 * Since we own the heap implementation, we know that the internal block
1342 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1343 * so all we need to wrt allocation size adjustments is to add 32 bytes
1344 * to the size, align up by 64 bytes, and subtract 32 bytes.
1345 *
1346 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1347 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1348 * allocation to force subsequent allocations to return 64 byte aligned
1349 * user areas.
1350 */
1351 if (!pExecMemAllocator->cbHeapBlockHdr)
1352 {
1353 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1354 pExecMemAllocator->cbHeapAlignTweak = 64;
1355 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1356 32 /*cbAlignment*/);
1357 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1358
1359 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1360 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1361 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1362 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1363 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1364
1365 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1366 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1367 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1368 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1369 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1370
1371 RTHeapSimpleFree(hHeap, pvTest2);
1372 RTHeapSimpleFree(hHeap, pvTest1);
1373 }
1374 else
1375 {
1376 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1377 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1378 }
1379 if (RT_SUCCESS(rc))
1380#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1381 {
1382 /*
1383 * Add the chunk.
1384 *
1385 * This must be done before the unwind init so windows can allocate
1386 * memory from the chunk when using the alternative sub-allocator.
1387 */
1388 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1389#ifdef IN_RING3
1390 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1391#endif
1392#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1393 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1394#else
1395 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1396 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1397 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1398 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1399#endif
1400
1401 pExecMemAllocator->cChunks = idxChunk + 1;
1402 pExecMemAllocator->idxChunkHint = idxChunk;
1403
1404#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1405 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1406 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1407#else
1408 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1409 pExecMemAllocator->cbTotal += cbFree;
1410 pExecMemAllocator->cbFree += cbFree;
1411#endif
1412
1413#ifdef IN_RING3
1414 /*
1415 * Initialize the unwind information (this cannot really fail atm).
1416 * (This sets pvUnwindInfo.)
1417 */
1418 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1419 if (RT_SUCCESS(rc))
1420#endif
1421 {
1422 return VINF_SUCCESS;
1423 }
1424
1425#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1426 /* Just in case the impossible happens, undo the above up: */
1427 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1428 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1429 pExecMemAllocator->cChunks = idxChunk;
1430 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1431 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1432 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1433 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1434#endif
1435 }
1436#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1437 }
1438#endif
1439 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1440 RT_NOREF(pVCpu);
1441 return rc;
1442}
1443
1444
1445/**
1446 * Initializes the executable memory allocator for native recompilation on the
1447 * calling EMT.
1448 *
1449 * @returns VBox status code.
1450 * @param pVCpu The cross context virtual CPU structure of the calling
1451 * thread.
1452 * @param cbMax The max size of the allocator.
1453 * @param cbInitial The initial allocator size.
1454 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1455 * dependent).
1456 */
1457int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1458{
1459 /*
1460 * Validate input.
1461 */
1462 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1463 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1464 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1465 || cbChunk == 0
1466 || ( RT_IS_POWER_OF_TWO(cbChunk)
1467 && cbChunk >= _1M
1468 && cbChunk <= _256M
1469 && cbChunk <= cbMax),
1470 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1471 VERR_OUT_OF_RANGE);
1472
1473 /*
1474 * Adjust/figure out the chunk size.
1475 */
1476 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1477 {
1478 if (cbMax >= _256M)
1479 cbChunk = _64M;
1480 else
1481 {
1482 if (cbMax < _16M)
1483 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1484 else
1485 cbChunk = (uint32_t)cbMax / 4;
1486 if (!RT_IS_POWER_OF_TWO(cbChunk))
1487 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1488 }
1489 }
1490
1491 if (cbChunk > cbMax)
1492 cbMax = cbChunk;
1493 else
1494 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1495 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1496 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1497
1498 /*
1499 * Allocate and initialize the allocatore instance.
1500 */
1501 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1502#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1503 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1504 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1505 cbNeeded += cbBitmap * cMaxChunks;
1506 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1507 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1508#endif
1509#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1510 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1511 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1512#endif
1513 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1514 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1515 VERR_NO_MEMORY);
1516 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1517 pExecMemAllocator->cbChunk = cbChunk;
1518 pExecMemAllocator->cMaxChunks = cMaxChunks;
1519 pExecMemAllocator->cChunks = 0;
1520 pExecMemAllocator->idxChunkHint = 0;
1521 pExecMemAllocator->cAllocations = 0;
1522 pExecMemAllocator->cbTotal = 0;
1523 pExecMemAllocator->cbFree = 0;
1524 pExecMemAllocator->cbAllocated = 0;
1525#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1526 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1527 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1528 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1529 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1530#endif
1531#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1532 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1533#endif
1534 for (uint32_t i = 0; i < cMaxChunks; i++)
1535 {
1536#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1537 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1538 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1539#else
1540 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1541#endif
1542 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1543#ifdef IN_RING0
1544 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1545#else
1546 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1547#endif
1548 }
1549 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1550
1551 /*
1552 * Do the initial allocations.
1553 */
1554 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1555 {
1556 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1557 AssertLogRelRCReturn(rc, rc);
1558 }
1559
1560 pExecMemAllocator->idxChunkHint = 0;
1561
1562 return VINF_SUCCESS;
1563}
1564
1565
1566/*********************************************************************************************************************************
1567* Native Recompilation *
1568*********************************************************************************************************************************/
1569
1570
1571/**
1572 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1573 */
1574IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1575{
1576 pVCpu->iem.s.cInstructions += idxInstr;
1577 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1578}
1579
1580
1581/**
1582 * Used by TB code when it wants to raise a \#DE.
1583 */
1584IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseDe,(PVMCPUCC pVCpu))
1585{
1586 iemRaiseDivideErrorJmp(pVCpu);
1587#ifndef _MSC_VER
1588 return VINF_IEM_RAISED_XCPT; /* not reached */
1589#endif
1590}
1591
1592
1593/**
1594 * Used by TB code when it wants to raise a \#UD.
1595 */
1596IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
1597{
1598 iemRaiseUndefinedOpcodeJmp(pVCpu);
1599#ifndef _MSC_VER
1600 return VINF_IEM_RAISED_XCPT; /* not reached */
1601#endif
1602}
1603
1604
1605/**
1606 * Used by TB code when it wants to raise an SSE related \#UD or \#NM.
1607 *
1608 * See IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT.
1609 */
1610IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseRelated,(PVMCPUCC pVCpu))
1611{
1612 if ( (pVCpu->cpum.GstCtx.cr0 & X86_CR0_EM)
1613 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSFXSR))
1614 iemRaiseUndefinedOpcodeJmp(pVCpu);
1615 else
1616 iemRaiseDeviceNotAvailableJmp(pVCpu);
1617#ifndef _MSC_VER
1618 return VINF_IEM_RAISED_XCPT; /* not reached */
1619#endif
1620}
1621
1622
1623/**
1624 * Used by TB code when it wants to raise an AVX related \#UD or \#NM.
1625 *
1626 * See IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT.
1627 */
1628IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseAvxRelated,(PVMCPUCC pVCpu))
1629{
1630 if ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE)
1631 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE))
1632 iemRaiseUndefinedOpcodeJmp(pVCpu);
1633 else
1634 iemRaiseDeviceNotAvailableJmp(pVCpu);
1635#ifndef _MSC_VER
1636 return VINF_IEM_RAISED_XCPT; /* not reached */
1637#endif
1638}
1639
1640
1641/**
1642 * Used by TB code when it wants to raise a \#NM.
1643 */
1644IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
1645{
1646 iemRaiseDeviceNotAvailableJmp(pVCpu);
1647#ifndef _MSC_VER
1648 return VINF_IEM_RAISED_XCPT; /* not reached */
1649#endif
1650}
1651
1652
1653/**
1654 * Used by TB code when it wants to raise a \#GP(0).
1655 */
1656IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
1657{
1658 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1659#ifndef _MSC_VER
1660 return VINF_IEM_RAISED_XCPT; /* not reached */
1661#endif
1662}
1663
1664
1665/**
1666 * Used by TB code when it wants to raise a \#MF.
1667 */
1668IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
1669{
1670 iemRaiseMathFaultJmp(pVCpu);
1671#ifndef _MSC_VER
1672 return VINF_IEM_RAISED_XCPT; /* not reached */
1673#endif
1674}
1675
1676
1677/**
1678 * Used by TB code when it wants to raise a \#XF.
1679 */
1680IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
1681{
1682 iemRaiseSimdFpExceptionJmp(pVCpu);
1683#ifndef _MSC_VER
1684 return VINF_IEM_RAISED_XCPT; /* not reached */
1685#endif
1686}
1687
1688
1689/**
1690 * Used by TB code when detecting opcode changes.
1691 * @see iemThreadeFuncWorkerObsoleteTb
1692 */
1693IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
1694{
1695 /* We set fSafeToFree to false where as we're being called in the context
1696 of a TB callback function, which for native TBs means we cannot release
1697 the executable memory till we've returned our way back to iemTbExec as
1698 that return path codes via the native code generated for the TB. */
1699 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1700 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
1701 return VINF_IEM_REEXEC_BREAK;
1702}
1703
1704
1705/**
1706 * Used by TB code when we need to switch to a TB with CS.LIM checking.
1707 */
1708IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
1709{
1710 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
1711 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1712 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
1713 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
1714 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
1715 return VINF_IEM_REEXEC_BREAK;
1716}
1717
1718
1719/**
1720 * Used by TB code when we missed a PC check after a branch.
1721 */
1722IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
1723{
1724 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
1725 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1726 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
1727 pVCpu->iem.s.pbInstrBuf));
1728 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
1729 return VINF_IEM_REEXEC_BREAK;
1730}
1731
1732
1733
1734/*********************************************************************************************************************************
1735* Helpers: Segmented memory fetches and stores. *
1736*********************************************************************************************************************************/
1737
1738/**
1739 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1740 */
1741IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1742{
1743#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1744 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1745#else
1746 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1747#endif
1748}
1749
1750
1751/**
1752 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1753 * to 16 bits.
1754 */
1755IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1756{
1757#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1758 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1759#else
1760 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1761#endif
1762}
1763
1764
1765/**
1766 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1767 * to 32 bits.
1768 */
1769IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1770{
1771#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1772 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1773#else
1774 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1775#endif
1776}
1777
1778/**
1779 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1780 * to 64 bits.
1781 */
1782IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1783{
1784#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1785 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1786#else
1787 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1788#endif
1789}
1790
1791
1792/**
1793 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1794 */
1795IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1796{
1797#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1798 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1799#else
1800 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1801#endif
1802}
1803
1804
1805/**
1806 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1807 * to 32 bits.
1808 */
1809IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1810{
1811#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1812 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1813#else
1814 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1815#endif
1816}
1817
1818
1819/**
1820 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1821 * to 64 bits.
1822 */
1823IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1824{
1825#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1826 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1827#else
1828 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1829#endif
1830}
1831
1832
1833/**
1834 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1835 */
1836IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1837{
1838#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1839 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1840#else
1841 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1842#endif
1843}
1844
1845
1846/**
1847 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
1848 * to 64 bits.
1849 */
1850IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1851{
1852#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1853 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1854#else
1855 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1856#endif
1857}
1858
1859
1860/**
1861 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1862 */
1863IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1864{
1865#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1866 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
1867#else
1868 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
1869#endif
1870}
1871
1872
1873#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1874/**
1875 * Used by TB code to load 128-bit data w/ segmentation.
1876 */
1877IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
1878{
1879#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1880 iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1881#else
1882 iemMemFetchDataU128Jmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1883#endif
1884}
1885
1886
1887/**
1888 * Used by TB code to load 128-bit data w/ segmentation.
1889 */
1890IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
1891{
1892#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1893 iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1894#else
1895 iemMemFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1896#endif
1897}
1898
1899
1900/**
1901 * Used by TB code to load 128-bit data w/ segmentation.
1902 */
1903IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
1904{
1905#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1906 iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1907#else
1908 iemMemFetchDataU128NoAcJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1909#endif
1910}
1911
1912
1913/**
1914 * Used by TB code to load 256-bit data w/ segmentation.
1915 */
1916IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
1917{
1918#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1919 iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
1920#else
1921 iemMemFetchDataU256NoAcJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
1922#endif
1923}
1924#endif
1925
1926
1927/**
1928 * Used by TB code to store unsigned 8-bit data w/ segmentation.
1929 */
1930IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
1931{
1932#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1933 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1934#else
1935 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1936#endif
1937}
1938
1939
1940/**
1941 * Used by TB code to store unsigned 16-bit data w/ segmentation.
1942 */
1943IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
1944{
1945#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1946 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1947#else
1948 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1949#endif
1950}
1951
1952
1953/**
1954 * Used by TB code to store unsigned 32-bit data w/ segmentation.
1955 */
1956IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
1957{
1958#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1959 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1960#else
1961 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1962#endif
1963}
1964
1965
1966/**
1967 * Used by TB code to store unsigned 64-bit data w/ segmentation.
1968 */
1969IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
1970{
1971#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1972 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1973#else
1974 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1975#endif
1976}
1977
1978
1979#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1980/**
1981 * Used by TB code to store unsigned 128-bit data w/ segmentation.
1982 */
1983IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
1984{
1985#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1986 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
1987#else
1988 iemMemStoreDataU128AlignedSseJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
1989#endif
1990}
1991
1992
1993/**
1994 * Used by TB code to store unsigned 128-bit data w/ segmentation.
1995 */
1996IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
1997{
1998#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1999 iemMemStoreDataU128NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
2000#else
2001 iemMemStoreDataU128NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
2002#endif
2003}
2004#endif
2005
2006
2007
2008/**
2009 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
2010 */
2011IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2012{
2013#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2014 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2015#else
2016 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2017#endif
2018}
2019
2020
2021/**
2022 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
2023 */
2024IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2025{
2026#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2027 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2028#else
2029 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2030#endif
2031}
2032
2033
2034/**
2035 * Used by TB code to store an 32-bit selector value onto a generic stack.
2036 *
2037 * Intel CPUs doesn't do write a whole dword, thus the special function.
2038 */
2039IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2040{
2041#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2042 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2043#else
2044 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2045#endif
2046}
2047
2048
2049/**
2050 * Used by TB code to push unsigned 64-bit value onto a generic stack.
2051 */
2052IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2053{
2054#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2055 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2056#else
2057 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2058#endif
2059}
2060
2061
2062/**
2063 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2064 */
2065IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2066{
2067#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2068 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2069#else
2070 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
2071#endif
2072}
2073
2074
2075/**
2076 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2077 */
2078IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2079{
2080#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2081 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2082#else
2083 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
2084#endif
2085}
2086
2087
2088/**
2089 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2090 */
2091IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2092{
2093#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2094 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2095#else
2096 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
2097#endif
2098}
2099
2100
2101
2102/*********************************************************************************************************************************
2103* Helpers: Flat memory fetches and stores. *
2104*********************************************************************************************************************************/
2105
2106/**
2107 * Used by TB code to load unsigned 8-bit data w/ flat address.
2108 * @note Zero extending the value to 64-bit to simplify assembly.
2109 */
2110IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2111{
2112#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2113 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2114#else
2115 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2116#endif
2117}
2118
2119
2120/**
2121 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2122 * to 16 bits.
2123 * @note Zero extending the value to 64-bit to simplify assembly.
2124 */
2125IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2126{
2127#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2128 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2129#else
2130 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2131#endif
2132}
2133
2134
2135/**
2136 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2137 * to 32 bits.
2138 * @note Zero extending the value to 64-bit to simplify assembly.
2139 */
2140IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2141{
2142#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2143 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2144#else
2145 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2146#endif
2147}
2148
2149
2150/**
2151 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2152 * to 64 bits.
2153 */
2154IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2155{
2156#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2157 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2158#else
2159 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2160#endif
2161}
2162
2163
2164/**
2165 * Used by TB code to load unsigned 16-bit data w/ flat address.
2166 * @note Zero extending the value to 64-bit to simplify assembly.
2167 */
2168IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2169{
2170#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2171 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2172#else
2173 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2174#endif
2175}
2176
2177
2178/**
2179 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2180 * to 32 bits.
2181 * @note Zero extending the value to 64-bit to simplify assembly.
2182 */
2183IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2184{
2185#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2186 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2187#else
2188 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2189#endif
2190}
2191
2192
2193/**
2194 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2195 * to 64 bits.
2196 * @note Zero extending the value to 64-bit to simplify assembly.
2197 */
2198IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2199{
2200#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2201 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2202#else
2203 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2204#endif
2205}
2206
2207
2208/**
2209 * Used by TB code to load unsigned 32-bit data w/ flat address.
2210 * @note Zero extending the value to 64-bit to simplify assembly.
2211 */
2212IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2213{
2214#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2215 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2216#else
2217 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2218#endif
2219}
2220
2221
2222/**
2223 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
2224 * to 64 bits.
2225 * @note Zero extending the value to 64-bit to simplify assembly.
2226 */
2227IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2228{
2229#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2230 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2231#else
2232 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2233#endif
2234}
2235
2236
2237/**
2238 * Used by TB code to load unsigned 64-bit data w/ flat address.
2239 */
2240IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2241{
2242#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2243 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2244#else
2245 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
2246#endif
2247}
2248
2249
2250#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2251/**
2252 * Used by TB code to load unsigned 128-bit data w/ flat address.
2253 */
2254IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
2255{
2256#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2257 return iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2258#else
2259 return iemMemFlatFetchDataU128Jmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2260#endif
2261}
2262
2263
2264/**
2265 * Used by TB code to load unsigned 128-bit data w/ flat address.
2266 */
2267IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
2268{
2269#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2270 return iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2271#else
2272 return iemMemFlatFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2273#endif
2274}
2275
2276
2277/**
2278 * Used by TB code to load unsigned 128-bit data w/ flat address.
2279 */
2280IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
2281{
2282#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2283 return iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2284#else
2285 return iemMemFlatFetchDataU128NoAcJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2286#endif
2287}
2288
2289
2290/**
2291 * Used by TB code to load unsigned 256-bit data w/ flat address.
2292 */
2293IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
2294{
2295#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2296 return iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
2297#else
2298 return iemMemFlatFetchDataU256NoAcJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
2299#endif
2300}
2301#endif
2302
2303
2304/**
2305 * Used by TB code to store unsigned 8-bit data w/ flat address.
2306 */
2307IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
2308{
2309#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2310 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
2311#else
2312 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
2313#endif
2314}
2315
2316
2317/**
2318 * Used by TB code to store unsigned 16-bit data w/ flat address.
2319 */
2320IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2321{
2322#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2323 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
2324#else
2325 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
2326#endif
2327}
2328
2329
2330/**
2331 * Used by TB code to store unsigned 32-bit data w/ flat address.
2332 */
2333IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2334{
2335#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2336 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
2337#else
2338 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
2339#endif
2340}
2341
2342
2343/**
2344 * Used by TB code to store unsigned 64-bit data w/ flat address.
2345 */
2346IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2347{
2348#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2349 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
2350#else
2351 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
2352#endif
2353}
2354
2355
2356#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2357/**
2358 * Used by TB code to store unsigned 128-bit data w/ flat address.
2359 */
2360IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
2361{
2362#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2363 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
2364#else
2365 iemMemFlatStoreDataU128AlignedSseJmp(pVCpu, GCPtrMem, pu128Src);
2366#endif
2367}
2368
2369
2370/**
2371 * Used by TB code to store unsigned 128-bit data w/ flat address.
2372 */
2373IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
2374{
2375#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2376 iemMemStoreDataU128NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
2377#else
2378 iemMemFlatStoreDataU128NoAcJmp(pVCpu, GCPtrMem, pu128Src);
2379#endif
2380}
2381#endif
2382
2383
2384
2385/**
2386 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
2387 */
2388IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2389{
2390#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2391 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2392#else
2393 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2394#endif
2395}
2396
2397
2398/**
2399 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
2400 */
2401IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2402{
2403#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2404 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2405#else
2406 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2407#endif
2408}
2409
2410
2411/**
2412 * Used by TB code to store a segment selector value onto a flat stack.
2413 *
2414 * Intel CPUs doesn't do write a whole dword, thus the special function.
2415 */
2416IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2417{
2418#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2419 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2420#else
2421 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2422#endif
2423}
2424
2425
2426/**
2427 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
2428 */
2429IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2430{
2431#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2432 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2433#else
2434 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2435#endif
2436}
2437
2438
2439/**
2440 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2441 */
2442IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2443{
2444#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2445 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2446#else
2447 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
2448#endif
2449}
2450
2451
2452/**
2453 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2454 */
2455IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2456{
2457#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2458 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2459#else
2460 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
2461#endif
2462}
2463
2464
2465/**
2466 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2467 */
2468IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2469{
2470#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2471 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2472#else
2473 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
2474#endif
2475}
2476
2477
2478
2479/*********************************************************************************************************************************
2480* Helpers: Segmented memory mapping. *
2481*********************************************************************************************************************************/
2482
2483/**
2484 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
2485 * segmentation.
2486 */
2487IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2488 RTGCPTR GCPtrMem, uint8_t iSegReg))
2489{
2490#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2491 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2492#else
2493 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2494#endif
2495}
2496
2497
2498/**
2499 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
2500 */
2501IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2502 RTGCPTR GCPtrMem, uint8_t iSegReg))
2503{
2504#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2505 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2506#else
2507 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2508#endif
2509}
2510
2511
2512/**
2513 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
2514 */
2515IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2516 RTGCPTR GCPtrMem, uint8_t iSegReg))
2517{
2518#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2519 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2520#else
2521 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2522#endif
2523}
2524
2525
2526/**
2527 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
2528 */
2529IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2530 RTGCPTR GCPtrMem, uint8_t iSegReg))
2531{
2532#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2533 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2534#else
2535 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2536#endif
2537}
2538
2539
2540/**
2541 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
2542 * segmentation.
2543 */
2544IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2545 RTGCPTR GCPtrMem, uint8_t iSegReg))
2546{
2547#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2548 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2549#else
2550 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2551#endif
2552}
2553
2554
2555/**
2556 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
2557 */
2558IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2559 RTGCPTR GCPtrMem, uint8_t iSegReg))
2560{
2561#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2562 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2563#else
2564 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2565#endif
2566}
2567
2568
2569/**
2570 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
2571 */
2572IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2573 RTGCPTR GCPtrMem, uint8_t iSegReg))
2574{
2575#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2576 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2577#else
2578 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2579#endif
2580}
2581
2582
2583/**
2584 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
2585 */
2586IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2587 RTGCPTR GCPtrMem, uint8_t iSegReg))
2588{
2589#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2590 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2591#else
2592 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2593#endif
2594}
2595
2596
2597/**
2598 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
2599 * segmentation.
2600 */
2601IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2602 RTGCPTR GCPtrMem, uint8_t iSegReg))
2603{
2604#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2605 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2606#else
2607 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2608#endif
2609}
2610
2611
2612/**
2613 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
2614 */
2615IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2616 RTGCPTR GCPtrMem, uint8_t iSegReg))
2617{
2618#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2619 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2620#else
2621 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2622#endif
2623}
2624
2625
2626/**
2627 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
2628 */
2629IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2630 RTGCPTR GCPtrMem, uint8_t iSegReg))
2631{
2632#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2633 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2634#else
2635 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2636#endif
2637}
2638
2639
2640/**
2641 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
2642 */
2643IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2644 RTGCPTR GCPtrMem, uint8_t iSegReg))
2645{
2646#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2647 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2648#else
2649 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2650#endif
2651}
2652
2653
2654/**
2655 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
2656 * segmentation.
2657 */
2658IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2659 RTGCPTR GCPtrMem, uint8_t iSegReg))
2660{
2661#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2662 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2663#else
2664 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2665#endif
2666}
2667
2668
2669/**
2670 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
2671 */
2672IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2673 RTGCPTR GCPtrMem, uint8_t iSegReg))
2674{
2675#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2676 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2677#else
2678 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2679#endif
2680}
2681
2682
2683/**
2684 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
2685 */
2686IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2687 RTGCPTR GCPtrMem, uint8_t iSegReg))
2688{
2689#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2690 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2691#else
2692 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2693#endif
2694}
2695
2696
2697/**
2698 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
2699 */
2700IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2701 RTGCPTR GCPtrMem, uint8_t iSegReg))
2702{
2703#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2704 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2705#else
2706 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2707#endif
2708}
2709
2710
2711/**
2712 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
2713 */
2714IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2715 RTGCPTR GCPtrMem, uint8_t iSegReg))
2716{
2717#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2718 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2719#else
2720 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2721#endif
2722}
2723
2724
2725/**
2726 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
2727 */
2728IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2729 RTGCPTR GCPtrMem, uint8_t iSegReg))
2730{
2731#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2732 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2733#else
2734 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2735#endif
2736}
2737
2738
2739/**
2740 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
2741 * segmentation.
2742 */
2743IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2744 RTGCPTR GCPtrMem, uint8_t iSegReg))
2745{
2746#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2747 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2748#else
2749 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2750#endif
2751}
2752
2753
2754/**
2755 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
2756 */
2757IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2758 RTGCPTR GCPtrMem, uint8_t iSegReg))
2759{
2760#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2761 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2762#else
2763 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2764#endif
2765}
2766
2767
2768/**
2769 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
2770 */
2771IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2772 RTGCPTR GCPtrMem, uint8_t iSegReg))
2773{
2774#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2775 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2776#else
2777 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2778#endif
2779}
2780
2781
2782/**
2783 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
2784 */
2785IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2786 RTGCPTR GCPtrMem, uint8_t iSegReg))
2787{
2788#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2789 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2790#else
2791 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2792#endif
2793}
2794
2795
2796/*********************************************************************************************************************************
2797* Helpers: Flat memory mapping. *
2798*********************************************************************************************************************************/
2799
2800/**
2801 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
2802 * address.
2803 */
2804IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2805{
2806#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2807 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2808#else
2809 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2810#endif
2811}
2812
2813
2814/**
2815 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
2816 */
2817IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2818{
2819#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2820 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2821#else
2822 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2823#endif
2824}
2825
2826
2827/**
2828 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
2829 */
2830IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2831{
2832#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2833 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2834#else
2835 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2836#endif
2837}
2838
2839
2840/**
2841 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
2842 */
2843IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2844{
2845#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2846 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2847#else
2848 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2849#endif
2850}
2851
2852
2853/**
2854 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
2855 * address.
2856 */
2857IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2858{
2859#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2860 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2861#else
2862 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2863#endif
2864}
2865
2866
2867/**
2868 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
2869 */
2870IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2871{
2872#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2873 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2874#else
2875 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2876#endif
2877}
2878
2879
2880/**
2881 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
2882 */
2883IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2884{
2885#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2886 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2887#else
2888 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2889#endif
2890}
2891
2892
2893/**
2894 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
2895 */
2896IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2897{
2898#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2899 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2900#else
2901 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2902#endif
2903}
2904
2905
2906/**
2907 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
2908 * address.
2909 */
2910IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2911{
2912#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2913 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2914#else
2915 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2916#endif
2917}
2918
2919
2920/**
2921 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
2922 */
2923IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2924{
2925#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2926 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2927#else
2928 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2929#endif
2930}
2931
2932
2933/**
2934 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
2935 */
2936IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2937{
2938#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2939 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2940#else
2941 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2942#endif
2943}
2944
2945
2946/**
2947 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
2948 */
2949IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2950{
2951#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2952 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2953#else
2954 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2955#endif
2956}
2957
2958
2959/**
2960 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
2961 * address.
2962 */
2963IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2964{
2965#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2966 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2967#else
2968 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2969#endif
2970}
2971
2972
2973/**
2974 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
2975 */
2976IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2977{
2978#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2979 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2980#else
2981 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2982#endif
2983}
2984
2985
2986/**
2987 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
2988 */
2989IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2990{
2991#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2992 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2993#else
2994 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2995#endif
2996}
2997
2998
2999/**
3000 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
3001 */
3002IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3003{
3004#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3005 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3006#else
3007 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3008#endif
3009}
3010
3011
3012/**
3013 * Used by TB code to map 80-bit float data writeonly w/ flat address.
3014 */
3015IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3016{
3017#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3018 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3019#else
3020 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3021#endif
3022}
3023
3024
3025/**
3026 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
3027 */
3028IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3029{
3030#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3031 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3032#else
3033 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3034#endif
3035}
3036
3037
3038/**
3039 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
3040 * address.
3041 */
3042IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3043{
3044#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3045 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3046#else
3047 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3048#endif
3049}
3050
3051
3052/**
3053 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
3054 */
3055IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3056{
3057#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3058 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3059#else
3060 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3061#endif
3062}
3063
3064
3065/**
3066 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
3067 */
3068IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3069{
3070#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3071 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3072#else
3073 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3074#endif
3075}
3076
3077
3078/**
3079 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
3080 */
3081IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3082{
3083#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3084 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3085#else
3086 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3087#endif
3088}
3089
3090
3091/*********************************************************************************************************************************
3092* Helpers: Commit, rollback & unmap *
3093*********************************************************************************************************************************/
3094
3095/**
3096 * Used by TB code to commit and unmap a read-write memory mapping.
3097 */
3098IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3099{
3100 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
3101}
3102
3103
3104/**
3105 * Used by TB code to commit and unmap a read-write memory mapping.
3106 */
3107IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3108{
3109 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
3110}
3111
3112
3113/**
3114 * Used by TB code to commit and unmap a write-only memory mapping.
3115 */
3116IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3117{
3118 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
3119}
3120
3121
3122/**
3123 * Used by TB code to commit and unmap a read-only memory mapping.
3124 */
3125IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3126{
3127 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
3128}
3129
3130
3131/**
3132 * Reinitializes the native recompiler state.
3133 *
3134 * Called before starting a new recompile job.
3135 */
3136static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
3137{
3138 pReNative->cLabels = 0;
3139 pReNative->bmLabelTypes = 0;
3140 pReNative->cFixups = 0;
3141#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3142 pReNative->pDbgInfo->cEntries = 0;
3143#endif
3144 pReNative->pTbOrg = pTb;
3145 pReNative->cCondDepth = 0;
3146 pReNative->uCondSeqNo = 0;
3147 pReNative->uCheckIrqSeqNo = 0;
3148 pReNative->uTlbSeqNo = 0;
3149
3150#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3151 pReNative->Core.offPc = 0;
3152 pReNative->Core.cInstrPcUpdateSkipped = 0;
3153#endif
3154#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3155 pReNative->fSimdRaiseXcptChecksEmitted = 0;
3156#endif
3157 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
3158#if IEMNATIVE_HST_GREG_COUNT < 32
3159 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
3160#endif
3161 ;
3162 pReNative->Core.bmHstRegsWithGstShadow = 0;
3163 pReNative->Core.bmGstRegShadows = 0;
3164 pReNative->Core.bmVars = 0;
3165 pReNative->Core.bmStack = 0;
3166 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
3167 pReNative->Core.u64ArgVars = UINT64_MAX;
3168
3169 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 16);
3170 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
3171 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
3172 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
3173 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
3174 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
3175 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
3176 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
3177 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
3178 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
3179 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
3180 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
3181 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
3182 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
3183 pReNative->aidxUniqueLabels[13] = UINT32_MAX;
3184 pReNative->aidxUniqueLabels[14] = UINT32_MAX;
3185 pReNative->aidxUniqueLabels[15] = UINT32_MAX;
3186
3187 /* Full host register reinit: */
3188 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
3189 {
3190 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
3191 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
3192 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
3193 }
3194
3195 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
3196 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
3197#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3198 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
3199#endif
3200#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3201 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
3202#endif
3203#ifdef IEMNATIVE_REG_FIXED_TMP1
3204 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
3205#endif
3206#ifdef IEMNATIVE_REG_FIXED_PC_DBG
3207 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
3208#endif
3209 );
3210 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3211 {
3212 fRegs &= ~RT_BIT_32(idxReg);
3213 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
3214 }
3215
3216 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
3217#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3218 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
3219#endif
3220#ifdef IEMNATIVE_REG_FIXED_TMP0
3221 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3222#endif
3223#ifdef IEMNATIVE_REG_FIXED_TMP1
3224 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
3225#endif
3226#ifdef IEMNATIVE_REG_FIXED_PC_DBG
3227 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
3228#endif
3229
3230#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3231# ifdef RT_ARCH_ARM64
3232 /*
3233 * Arm64 has 32 128-bit registers only, in order to support emulating 256-bit registers we pair
3234 * two real registers statically to one virtual for now, leaving us with only 16 256-bit registers.
3235 * We always pair v0 with v1, v2 with v3, etc. so we mark the higher register as fixed here during init
3236 * and the register allocator assumes that it will be always free when the lower is picked.
3237 */
3238 uint32_t const fFixedAdditional = UINT32_C(0xaaaaaaaa);
3239# else
3240 uint32_t const fFixedAdditional = 0;
3241# endif
3242
3243 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
3244 | fFixedAdditional
3245# if IEMNATIVE_HST_SIMD_REG_COUNT < 32
3246 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
3247# endif
3248 ;
3249 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
3250 pReNative->Core.bmGstSimdRegShadows = 0;
3251 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
3252 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
3253
3254 /* Full host register reinit: */
3255 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
3256 {
3257 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
3258 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
3259 pReNative->Core.aHstSimdRegs[i].idxVar = UINT8_MAX;
3260 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
3261 }
3262
3263 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK | fFixedAdditional;
3264 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3265 {
3266 fRegs &= ~RT_BIT_32(idxReg);
3267 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
3268 }
3269
3270#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
3271 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3272#endif
3273
3274#endif
3275
3276 return pReNative;
3277}
3278
3279
3280/**
3281 * Allocates and initializes the native recompiler state.
3282 *
3283 * This is called the first time an EMT wants to recompile something.
3284 *
3285 * @returns Pointer to the new recompiler state.
3286 * @param pVCpu The cross context virtual CPU structure of the calling
3287 * thread.
3288 * @param pTb The TB that's about to be recompiled.
3289 * @thread EMT(pVCpu)
3290 */
3291static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
3292{
3293 VMCPU_ASSERT_EMT(pVCpu);
3294
3295 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
3296 AssertReturn(pReNative, NULL);
3297
3298 /*
3299 * Try allocate all the buffers and stuff we need.
3300 */
3301 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
3302 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
3303 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
3304#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3305 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
3306#endif
3307 if (RT_LIKELY( pReNative->pInstrBuf
3308 && pReNative->paLabels
3309 && pReNative->paFixups)
3310#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3311 && pReNative->pDbgInfo
3312#endif
3313 )
3314 {
3315 /*
3316 * Set the buffer & array sizes on success.
3317 */
3318 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
3319 pReNative->cLabelsAlloc = _8K;
3320 pReNative->cFixupsAlloc = _16K;
3321#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3322 pReNative->cDbgInfoAlloc = _16K;
3323#endif
3324
3325 /* Other constant stuff: */
3326 pReNative->pVCpu = pVCpu;
3327
3328 /*
3329 * Done, just need to save it and reinit it.
3330 */
3331 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
3332 return iemNativeReInit(pReNative, pTb);
3333 }
3334
3335 /*
3336 * Failed. Cleanup and return.
3337 */
3338 AssertFailed();
3339 RTMemFree(pReNative->pInstrBuf);
3340 RTMemFree(pReNative->paLabels);
3341 RTMemFree(pReNative->paFixups);
3342#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3343 RTMemFree(pReNative->pDbgInfo);
3344#endif
3345 RTMemFree(pReNative);
3346 return NULL;
3347}
3348
3349
3350/**
3351 * Creates a label
3352 *
3353 * If the label does not yet have a defined position,
3354 * call iemNativeLabelDefine() later to set it.
3355 *
3356 * @returns Label ID. Throws VBox status code on failure, so no need to check
3357 * the return value.
3358 * @param pReNative The native recompile state.
3359 * @param enmType The label type.
3360 * @param offWhere The instruction offset of the label. UINT32_MAX if the
3361 * label is not yet defined (default).
3362 * @param uData Data associated with the lable. Only applicable to
3363 * certain type of labels. Default is zero.
3364 */
3365DECL_HIDDEN_THROW(uint32_t)
3366iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3367 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
3368{
3369 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
3370
3371 /*
3372 * Locate existing label definition.
3373 *
3374 * This is only allowed for forward declarations where offWhere=UINT32_MAX
3375 * and uData is zero.
3376 */
3377 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3378 uint32_t const cLabels = pReNative->cLabels;
3379 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
3380#ifndef VBOX_STRICT
3381 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
3382 && offWhere == UINT32_MAX
3383 && uData == 0
3384#endif
3385 )
3386 {
3387#ifndef VBOX_STRICT
3388 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
3389 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3390 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
3391 if (idxLabel < pReNative->cLabels)
3392 return idxLabel;
3393#else
3394 for (uint32_t i = 0; i < cLabels; i++)
3395 if ( paLabels[i].enmType == enmType
3396 && paLabels[i].uData == uData)
3397 {
3398 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3399 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3400 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
3401 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
3402 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3403 return i;
3404 }
3405 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
3406 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3407#endif
3408 }
3409
3410 /*
3411 * Make sure we've got room for another label.
3412 */
3413 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
3414 { /* likely */ }
3415 else
3416 {
3417 uint32_t cNew = pReNative->cLabelsAlloc;
3418 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3419 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3420 cNew *= 2;
3421 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
3422 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
3423 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
3424 pReNative->paLabels = paLabels;
3425 pReNative->cLabelsAlloc = cNew;
3426 }
3427
3428 /*
3429 * Define a new label.
3430 */
3431 paLabels[cLabels].off = offWhere;
3432 paLabels[cLabels].enmType = enmType;
3433 paLabels[cLabels].uData = uData;
3434 pReNative->cLabels = cLabels + 1;
3435
3436 Assert((unsigned)enmType < 64);
3437 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
3438
3439 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3440 {
3441 Assert(uData == 0);
3442 pReNative->aidxUniqueLabels[enmType] = cLabels;
3443 }
3444
3445 if (offWhere != UINT32_MAX)
3446 {
3447#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3448 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3449 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
3450#endif
3451 }
3452 return cLabels;
3453}
3454
3455
3456/**
3457 * Defines the location of an existing label.
3458 *
3459 * @param pReNative The native recompile state.
3460 * @param idxLabel The label to define.
3461 * @param offWhere The position.
3462 */
3463DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
3464{
3465 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
3466 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
3467 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
3468 pLabel->off = offWhere;
3469#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3470 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3471 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
3472#endif
3473}
3474
3475
3476/**
3477 * Looks up a lable.
3478 *
3479 * @returns Label ID if found, UINT32_MAX if not.
3480 */
3481static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3482 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
3483{
3484 Assert((unsigned)enmType < 64);
3485 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
3486 {
3487 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3488 return pReNative->aidxUniqueLabels[enmType];
3489
3490 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3491 uint32_t const cLabels = pReNative->cLabels;
3492 for (uint32_t i = 0; i < cLabels; i++)
3493 if ( paLabels[i].enmType == enmType
3494 && paLabels[i].uData == uData
3495 && ( paLabels[i].off == offWhere
3496 || offWhere == UINT32_MAX
3497 || paLabels[i].off == UINT32_MAX))
3498 return i;
3499 }
3500 return UINT32_MAX;
3501}
3502
3503
3504/**
3505 * Adds a fixup.
3506 *
3507 * @throws VBox status code (int) on failure.
3508 * @param pReNative The native recompile state.
3509 * @param offWhere The instruction offset of the fixup location.
3510 * @param idxLabel The target label ID for the fixup.
3511 * @param enmType The fixup type.
3512 * @param offAddend Fixup addend if applicable to the type. Default is 0.
3513 */
3514DECL_HIDDEN_THROW(void)
3515iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
3516 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
3517{
3518 Assert(idxLabel <= UINT16_MAX);
3519 Assert((unsigned)enmType <= UINT8_MAX);
3520#ifdef RT_ARCH_ARM64
3521 AssertStmt( enmType != kIemNativeFixupType_RelImm14At5
3522 || pReNative->paLabels[idxLabel].enmType >= kIemNativeLabelType_LastWholeTbBranch,
3523 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_SHORT_JMP_TO_TAIL_LABEL));
3524#endif
3525
3526 /*
3527 * Make sure we've room.
3528 */
3529 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
3530 uint32_t const cFixups = pReNative->cFixups;
3531 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
3532 { /* likely */ }
3533 else
3534 {
3535 uint32_t cNew = pReNative->cFixupsAlloc;
3536 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3537 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3538 cNew *= 2;
3539 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
3540 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
3541 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
3542 pReNative->paFixups = paFixups;
3543 pReNative->cFixupsAlloc = cNew;
3544 }
3545
3546 /*
3547 * Add the fixup.
3548 */
3549 paFixups[cFixups].off = offWhere;
3550 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
3551 paFixups[cFixups].enmType = enmType;
3552 paFixups[cFixups].offAddend = offAddend;
3553 pReNative->cFixups = cFixups + 1;
3554}
3555
3556
3557/**
3558 * Slow code path for iemNativeInstrBufEnsure.
3559 */
3560DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
3561{
3562 /* Double the buffer size till we meet the request. */
3563 uint32_t cNew = pReNative->cInstrBufAlloc;
3564 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
3565 do
3566 cNew *= 2;
3567 while (cNew < off + cInstrReq);
3568
3569 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
3570#ifdef RT_ARCH_ARM64
3571 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
3572#else
3573 uint32_t const cbMaxInstrBuf = _2M;
3574#endif
3575 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
3576
3577 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
3578 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
3579
3580#ifdef VBOX_STRICT
3581 pReNative->offInstrBufChecked = off + cInstrReq;
3582#endif
3583 pReNative->cInstrBufAlloc = cNew;
3584 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
3585}
3586
3587#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3588
3589/**
3590 * Grows the static debug info array used during recompilation.
3591 *
3592 * @returns Pointer to the new debug info block; throws VBox status code on
3593 * failure, so no need to check the return value.
3594 */
3595DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3596{
3597 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
3598 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
3599 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
3600 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
3601 pReNative->pDbgInfo = pDbgInfo;
3602 pReNative->cDbgInfoAlloc = cNew;
3603 return pDbgInfo;
3604}
3605
3606
3607/**
3608 * Adds a new debug info uninitialized entry, returning the pointer to it.
3609 */
3610DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3611{
3612 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
3613 { /* likely */ }
3614 else
3615 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
3616 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
3617}
3618
3619
3620/**
3621 * Debug Info: Adds a native offset record, if necessary.
3622 */
3623DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3624{
3625 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
3626
3627 /*
3628 * Search backwards to see if we've got a similar record already.
3629 */
3630 uint32_t idx = pDbgInfo->cEntries;
3631 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
3632 while (idx-- > idxStop)
3633 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
3634 {
3635 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
3636 return;
3637 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
3638 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
3639 break;
3640 }
3641
3642 /*
3643 * Add it.
3644 */
3645 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
3646 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
3647 pEntry->NativeOffset.offNative = off;
3648}
3649
3650
3651/**
3652 * Debug Info: Record info about a label.
3653 */
3654static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
3655{
3656 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3657 pEntry->Label.uType = kIemTbDbgEntryType_Label;
3658 pEntry->Label.uUnused = 0;
3659 pEntry->Label.enmLabel = (uint8_t)enmType;
3660 pEntry->Label.uData = uData;
3661}
3662
3663
3664/**
3665 * Debug Info: Record info about a threaded call.
3666 */
3667static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
3668{
3669 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3670 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
3671 pEntry->ThreadedCall.fRecompiled = fRecompiled;
3672 pEntry->ThreadedCall.uUnused = 0;
3673 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
3674}
3675
3676
3677/**
3678 * Debug Info: Record info about a new guest instruction.
3679 */
3680static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
3681{
3682 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3683 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
3684 pEntry->GuestInstruction.uUnused = 0;
3685 pEntry->GuestInstruction.fExec = fExec;
3686}
3687
3688
3689/**
3690 * Debug Info: Record info about guest register shadowing.
3691 */
3692DECL_HIDDEN_THROW(void)
3693iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
3694 uint8_t idxHstReg /*= UINT8_MAX*/, uint8_t idxHstRegPrev /*= UINT8_MAX*/)
3695{
3696 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3697 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
3698 pEntry->GuestRegShadowing.uUnused = 0;
3699 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
3700 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
3701 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
3702}
3703
3704
3705# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3706/**
3707 * Debug Info: Record info about guest register shadowing.
3708 */
3709DECL_HIDDEN_THROW(void)
3710iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
3711 uint8_t idxHstSimdReg /*= UINT8_MAX*/, uint8_t idxHstSimdRegPrev /*= UINT8_MAX*/)
3712{
3713 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3714 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
3715 pEntry->GuestSimdRegShadowing.uUnused = 0;
3716 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
3717 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
3718 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
3719}
3720# endif
3721
3722
3723# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3724/**
3725 * Debug Info: Record info about delayed RIP updates.
3726 */
3727DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t offPc, uint32_t cInstrSkipped)
3728{
3729 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3730 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
3731 pEntry->DelayedPcUpdate.offPc = offPc;
3732 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
3733}
3734# endif
3735
3736#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
3737
3738
3739/*********************************************************************************************************************************
3740* Register Allocator *
3741*********************************************************************************************************************************/
3742
3743/**
3744 * Register parameter indexes (indexed by argument number).
3745 */
3746DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
3747{
3748 IEMNATIVE_CALL_ARG0_GREG,
3749 IEMNATIVE_CALL_ARG1_GREG,
3750 IEMNATIVE_CALL_ARG2_GREG,
3751 IEMNATIVE_CALL_ARG3_GREG,
3752#if defined(IEMNATIVE_CALL_ARG4_GREG)
3753 IEMNATIVE_CALL_ARG4_GREG,
3754# if defined(IEMNATIVE_CALL_ARG5_GREG)
3755 IEMNATIVE_CALL_ARG5_GREG,
3756# if defined(IEMNATIVE_CALL_ARG6_GREG)
3757 IEMNATIVE_CALL_ARG6_GREG,
3758# if defined(IEMNATIVE_CALL_ARG7_GREG)
3759 IEMNATIVE_CALL_ARG7_GREG,
3760# endif
3761# endif
3762# endif
3763#endif
3764};
3765AssertCompile(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3766
3767/**
3768 * Call register masks indexed by argument count.
3769 */
3770DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
3771{
3772 0,
3773 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
3774 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
3775 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
3776 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3777 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
3778#if defined(IEMNATIVE_CALL_ARG4_GREG)
3779 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3780 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
3781# if defined(IEMNATIVE_CALL_ARG5_GREG)
3782 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3783 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
3784# if defined(IEMNATIVE_CALL_ARG6_GREG)
3785 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3786 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3787 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
3788# if defined(IEMNATIVE_CALL_ARG7_GREG)
3789 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3790 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3791 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
3792# endif
3793# endif
3794# endif
3795#endif
3796};
3797
3798#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
3799/**
3800 * BP offset of the stack argument slots.
3801 *
3802 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
3803 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
3804 */
3805DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
3806{
3807 IEMNATIVE_FP_OFF_STACK_ARG0,
3808# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
3809 IEMNATIVE_FP_OFF_STACK_ARG1,
3810# endif
3811# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
3812 IEMNATIVE_FP_OFF_STACK_ARG2,
3813# endif
3814# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
3815 IEMNATIVE_FP_OFF_STACK_ARG3,
3816# endif
3817};
3818AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
3819#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
3820
3821/**
3822 * Info about shadowed guest register values.
3823 * @see IEMNATIVEGSTREG
3824 */
3825DECL_HIDDEN_CONST(IEMANTIVEGSTREGINFO const) g_aGstShadowInfo[] =
3826{
3827#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
3828 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
3829 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
3830 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
3831 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
3832 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
3833 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
3834 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
3835 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
3836 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
3837 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
3838 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
3839 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
3840 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
3841 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
3842 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
3843 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
3844 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
3845 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
3846 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
3847 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
3848 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
3849 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
3850 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
3851 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
3852 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
3853 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
3854 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
3855 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
3856 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
3857 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
3858 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
3859 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
3860 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
3861 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
3862 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
3863 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
3864 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
3865 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
3866 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
3867 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
3868 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
3869 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
3870 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
3871 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
3872 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
3873 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
3874 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
3875 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
3876#undef CPUMCTX_OFF_AND_SIZE
3877};
3878AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
3879
3880
3881/** Host CPU general purpose register names. */
3882DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
3883{
3884#ifdef RT_ARCH_AMD64
3885 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
3886#elif RT_ARCH_ARM64
3887 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
3888 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
3889#else
3890# error "port me"
3891#endif
3892};
3893
3894
3895#if 0 /* unused */
3896/**
3897 * Tries to locate a suitable register in the given register mask.
3898 *
3899 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3900 * failed.
3901 *
3902 * @returns Host register number on success, returns UINT8_MAX on failure.
3903 */
3904static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
3905{
3906 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3907 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3908 if (fRegs)
3909 {
3910 /** @todo pick better here: */
3911 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
3912
3913 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3914 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3915 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3916 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3917
3918 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3919 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3920 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3921 return idxReg;
3922 }
3923 return UINT8_MAX;
3924}
3925#endif /* unused */
3926
3927
3928/**
3929 * Locate a register, possibly freeing one up.
3930 *
3931 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3932 * failed.
3933 *
3934 * @returns Host register number on success. Returns UINT8_MAX if no registers
3935 * found, the caller is supposed to deal with this and raise a
3936 * allocation type specific status code (if desired).
3937 *
3938 * @throws VBox status code if we're run into trouble spilling a variable of
3939 * recording debug info. Does NOT throw anything if we're out of
3940 * registers, though.
3941 */
3942static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3943 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3944{
3945 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
3946 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3947 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3948
3949 /*
3950 * Try a freed register that's shadowing a guest register.
3951 */
3952 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3953 if (fRegs)
3954 {
3955 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
3956
3957#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3958 /*
3959 * When we have livness information, we use it to kick out all shadowed
3960 * guest register that will not be needed any more in this TB. If we're
3961 * lucky, this may prevent us from ending up here again.
3962 *
3963 * Note! We must consider the previous entry here so we don't free
3964 * anything that the current threaded function requires (current
3965 * entry is produced by the next threaded function).
3966 */
3967 uint32_t const idxCurCall = pReNative->idxCurCall;
3968 if (idxCurCall > 0)
3969 {
3970 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
3971
3972# ifndef IEMLIVENESS_EXTENDED_LAYOUT
3973 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
3974 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
3975 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
3976#else
3977 /* Construct a mask of the registers not in the read or write state.
3978 Note! We could skips writes, if they aren't from us, as this is just
3979 a hack to prevent trashing registers that have just been written
3980 or will be written when we retire the current instruction. */
3981 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3982 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3983 & IEMLIVENESSBIT_MASK;
3984#endif
3985 /* Merge EFLAGS. */
3986 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
3987 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
3988 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
3989 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
3990 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
3991
3992 /* If it matches any shadowed registers. */
3993 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
3994 {
3995 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
3996 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
3997 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
3998
3999 /* See if we've got any unshadowed registers we can return now. */
4000 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
4001 if (fUnshadowedRegs)
4002 {
4003 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
4004 return (fPreferVolatile
4005 ? ASMBitFirstSetU32(fUnshadowedRegs)
4006 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4007 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
4008 - 1;
4009 }
4010 }
4011 }
4012#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4013
4014 unsigned const idxReg = (fPreferVolatile
4015 ? ASMBitFirstSetU32(fRegs)
4016 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4017 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
4018 - 1;
4019
4020 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4021 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4022 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4023 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4024
4025 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4026 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4027 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4028 return idxReg;
4029 }
4030
4031 /*
4032 * Try free up a variable that's in a register.
4033 *
4034 * We do two rounds here, first evacuating variables we don't need to be
4035 * saved on the stack, then in the second round move things to the stack.
4036 */
4037 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
4038 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
4039 {
4040 uint32_t fVars = pReNative->Core.bmVars;
4041 while (fVars)
4042 {
4043 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
4044 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
4045 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
4046 && (RT_BIT_32(idxReg) & fRegMask)
4047 && ( iLoop == 0
4048 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
4049 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
4050 && !pReNative->Core.aVars[idxVar].fRegAcquired)
4051 {
4052 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
4053 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
4054 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4055 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4056 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4057 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
4058
4059 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
4060 {
4061 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
4062 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
4063 }
4064
4065 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4066 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
4067
4068 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4069 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4070 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4071 return idxReg;
4072 }
4073 fVars &= ~RT_BIT_32(idxVar);
4074 }
4075 }
4076
4077 return UINT8_MAX;
4078}
4079
4080
4081/**
4082 * Reassigns a variable to a different register specified by the caller.
4083 *
4084 * @returns The new code buffer position.
4085 * @param pReNative The native recompile state.
4086 * @param off The current code buffer position.
4087 * @param idxVar The variable index.
4088 * @param idxRegOld The old host register number.
4089 * @param idxRegNew The new host register number.
4090 * @param pszCaller The caller for logging.
4091 */
4092static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4093 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
4094{
4095 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4096 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
4097#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4098 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4099#endif
4100 RT_NOREF(pszCaller);
4101
4102 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
4103
4104 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
4105 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
4106 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
4107 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
4108
4109 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
4110 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
4111 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
4112 if (fGstRegShadows)
4113 {
4114 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
4115 | RT_BIT_32(idxRegNew);
4116 while (fGstRegShadows)
4117 {
4118 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4119 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4120
4121 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
4122 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
4123 }
4124 }
4125
4126 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
4127 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
4128 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
4129 return off;
4130}
4131
4132
4133/**
4134 * Moves a variable to a different register or spills it onto the stack.
4135 *
4136 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
4137 * kinds can easily be recreated if needed later.
4138 *
4139 * @returns The new code buffer position.
4140 * @param pReNative The native recompile state.
4141 * @param off The current code buffer position.
4142 * @param idxVar The variable index.
4143 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
4144 * call-volatile registers.
4145 */
4146DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4147 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_GREG_MASK*/)
4148{
4149 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4150 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4151 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
4152 Assert(!pVar->fRegAcquired);
4153
4154 uint8_t const idxRegOld = pVar->idxReg;
4155 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
4156 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
4157 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
4158 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
4159 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
4160 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4161 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
4162 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
4163
4164
4165 /** @todo Add statistics on this.*/
4166 /** @todo Implement basic variable liveness analysis (python) so variables
4167 * can be freed immediately once no longer used. This has the potential to
4168 * be trashing registers and stack for dead variables.
4169 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
4170
4171 /*
4172 * First try move it to a different register, as that's cheaper.
4173 */
4174 fForbiddenRegs |= RT_BIT_32(idxRegOld);
4175 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
4176 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
4177 if (fRegs)
4178 {
4179 /* Avoid using shadow registers, if possible. */
4180 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
4181 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
4182 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
4183 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
4184 }
4185
4186 /*
4187 * Otherwise we must spill the register onto the stack.
4188 */
4189 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
4190 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
4191 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
4192 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
4193
4194 pVar->idxReg = UINT8_MAX;
4195 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
4196 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
4197 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
4198 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
4199 return off;
4200}
4201
4202
4203/**
4204 * Allocates a temporary host general purpose register.
4205 *
4206 * This may emit code to save register content onto the stack in order to free
4207 * up a register.
4208 *
4209 * @returns The host register number; throws VBox status code on failure,
4210 * so no need to check the return value.
4211 * @param pReNative The native recompile state.
4212 * @param poff Pointer to the variable with the code buffer position.
4213 * This will be update if we need to move a variable from
4214 * register to stack in order to satisfy the request.
4215 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4216 * registers (@c true, default) or the other way around
4217 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4218 */
4219DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
4220{
4221 /*
4222 * Try find a completely unused register, preferably a call-volatile one.
4223 */
4224 uint8_t idxReg;
4225 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4226 & ~pReNative->Core.bmHstRegsWithGstShadow
4227 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
4228 if (fRegs)
4229 {
4230 if (fPreferVolatile)
4231 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4232 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4233 else
4234 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4235 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4236 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4237 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4238 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4239 }
4240 else
4241 {
4242 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
4243 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4244 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4245 }
4246 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4247}
4248
4249
4250/**
4251 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
4252 * registers.
4253 *
4254 * @returns The host register number; throws VBox status code on failure,
4255 * so no need to check the return value.
4256 * @param pReNative The native recompile state.
4257 * @param poff Pointer to the variable with the code buffer position.
4258 * This will be update if we need to move a variable from
4259 * register to stack in order to satisfy the request.
4260 * @param fRegMask Mask of acceptable registers.
4261 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4262 * registers (@c true, default) or the other way around
4263 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4264 */
4265DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
4266 bool fPreferVolatile /*= true*/)
4267{
4268 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
4269 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
4270
4271 /*
4272 * Try find a completely unused register, preferably a call-volatile one.
4273 */
4274 uint8_t idxReg;
4275 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4276 & ~pReNative->Core.bmHstRegsWithGstShadow
4277 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
4278 & fRegMask;
4279 if (fRegs)
4280 {
4281 if (fPreferVolatile)
4282 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4283 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4284 else
4285 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4286 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4287 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4288 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4289 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4290 }
4291 else
4292 {
4293 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
4294 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4295 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4296 }
4297 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4298}
4299
4300
4301/**
4302 * Allocates a temporary register for loading an immediate value into.
4303 *
4304 * This will emit code to load the immediate, unless there happens to be an
4305 * unused register with the value already loaded.
4306 *
4307 * The caller will not modify the returned register, it must be considered
4308 * read-only. Free using iemNativeRegFreeTmpImm.
4309 *
4310 * @returns The host register number; throws VBox status code on failure, so no
4311 * need to check the return value.
4312 * @param pReNative The native recompile state.
4313 * @param poff Pointer to the variable with the code buffer position.
4314 * @param uImm The immediate value that the register must hold upon
4315 * return.
4316 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4317 * registers (@c true, default) or the other way around
4318 * (@c false).
4319 *
4320 * @note Reusing immediate values has not been implemented yet.
4321 */
4322DECL_HIDDEN_THROW(uint8_t)
4323iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
4324{
4325 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
4326 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
4327 return idxReg;
4328}
4329
4330
4331/**
4332 * Allocates a temporary host general purpose register for keeping a guest
4333 * register value.
4334 *
4335 * Since we may already have a register holding the guest register value,
4336 * code will be emitted to do the loading if that's not the case. Code may also
4337 * be emitted if we have to free up a register to satify the request.
4338 *
4339 * @returns The host register number; throws VBox status code on failure, so no
4340 * need to check the return value.
4341 * @param pReNative The native recompile state.
4342 * @param poff Pointer to the variable with the code buffer
4343 * position. This will be update if we need to move a
4344 * variable from register to stack in order to satisfy
4345 * the request.
4346 * @param enmGstReg The guest register that will is to be updated.
4347 * @param enmIntendedUse How the caller will be using the host register.
4348 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
4349 * register is okay (default). The ASSUMPTION here is
4350 * that the caller has already flushed all volatile
4351 * registers, so this is only applied if we allocate a
4352 * new register.
4353 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
4354 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
4355 */
4356DECL_HIDDEN_THROW(uint8_t)
4357iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
4358 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
4359 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
4360{
4361 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4362#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4363 AssertMsg( fSkipLivenessAssert
4364 || pReNative->idxCurCall == 0
4365 || enmGstReg == kIemNativeGstReg_Pc
4366 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4367 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4368 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
4369 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4370 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
4371 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4372#endif
4373 RT_NOREF(fSkipLivenessAssert);
4374#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
4375 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
4376#endif
4377 uint32_t const fRegMask = !fNoVolatileRegs
4378 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
4379 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4380
4381 /*
4382 * First check if the guest register value is already in a host register.
4383 */
4384 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4385 {
4386 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4387 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4388 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4389 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4390
4391 /* It's not supposed to be allocated... */
4392 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4393 {
4394 /*
4395 * If the register will trash the guest shadow copy, try find a
4396 * completely unused register we can use instead. If that fails,
4397 * we need to disassociate the host reg from the guest reg.
4398 */
4399 /** @todo would be nice to know if preserving the register is in any way helpful. */
4400 /* If the purpose is calculations, try duplicate the register value as
4401 we'll be clobbering the shadow. */
4402 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
4403 && ( ~pReNative->Core.bmHstRegs
4404 & ~pReNative->Core.bmHstRegsWithGstShadow
4405 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
4406 {
4407 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
4408
4409 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4410
4411 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4412 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4413 g_apszIemNativeHstRegNames[idxRegNew]));
4414 idxReg = idxRegNew;
4415 }
4416 /* If the current register matches the restrictions, go ahead and allocate
4417 it for the caller. */
4418 else if (fRegMask & RT_BIT_32(idxReg))
4419 {
4420 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4421 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4422 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4423 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4424 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
4425 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4426 else
4427 {
4428 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
4429 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
4430 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4431 }
4432 }
4433 /* Otherwise, allocate a register that satisfies the caller and transfer
4434 the shadowing if compatible with the intended use. (This basically
4435 means the call wants a non-volatile register (RSP push/pop scenario).) */
4436 else
4437 {
4438 Assert(fNoVolatileRegs);
4439 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
4440 !fNoVolatileRegs
4441 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
4442 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4443 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4444 {
4445 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4446 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
4447 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
4448 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4449 }
4450 else
4451 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4452 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4453 g_apszIemNativeHstRegNames[idxRegNew]));
4454 idxReg = idxRegNew;
4455 }
4456 }
4457 else
4458 {
4459 /*
4460 * Oops. Shadowed guest register already allocated!
4461 *
4462 * Allocate a new register, copy the value and, if updating, the
4463 * guest shadow copy assignment to the new register.
4464 */
4465 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4466 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
4467 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
4468 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
4469
4470 /** @todo share register for readonly access. */
4471 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
4472 enmIntendedUse == kIemNativeGstRegUse_Calculation);
4473
4474 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4475 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4476
4477 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4478 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4479 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
4480 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4481 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4482 else
4483 {
4484 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4485 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
4486 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4487 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4488 }
4489 idxReg = idxRegNew;
4490 }
4491 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
4492
4493#ifdef VBOX_STRICT
4494 /* Strict builds: Check that the value is correct. */
4495 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4496#endif
4497
4498 return idxReg;
4499 }
4500
4501 /*
4502 * Allocate a new register, load it with the guest value and designate it as a copy of the
4503 */
4504 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
4505
4506 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4507 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
4508
4509 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4510 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
4511 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
4512 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4513
4514 return idxRegNew;
4515}
4516
4517
4518/**
4519 * Allocates a temporary host general purpose register that already holds the
4520 * given guest register value.
4521 *
4522 * The use case for this function is places where the shadowing state cannot be
4523 * modified due to branching and such. This will fail if the we don't have a
4524 * current shadow copy handy or if it's incompatible. The only code that will
4525 * be emitted here is value checking code in strict builds.
4526 *
4527 * The intended use can only be readonly!
4528 *
4529 * @returns The host register number, UINT8_MAX if not present.
4530 * @param pReNative The native recompile state.
4531 * @param poff Pointer to the instruction buffer offset.
4532 * Will be updated in strict builds if a register is
4533 * found.
4534 * @param enmGstReg The guest register that will is to be updated.
4535 * @note In strict builds, this may throw instruction buffer growth failures.
4536 * Non-strict builds will not throw anything.
4537 * @sa iemNativeRegAllocTmpForGuestReg
4538 */
4539DECL_HIDDEN_THROW(uint8_t)
4540iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4541{
4542 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4543#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4544 AssertMsg( pReNative->idxCurCall == 0
4545 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4546 || enmGstReg == kIemNativeGstReg_Pc,
4547 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4548#endif
4549
4550 /*
4551 * First check if the guest register value is already in a host register.
4552 */
4553 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4554 {
4555 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4556 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4557 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4558 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4559
4560 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4561 {
4562 /*
4563 * We only do readonly use here, so easy compared to the other
4564 * variant of this code.
4565 */
4566 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4567 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4568 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4569 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
4570 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4571
4572#ifdef VBOX_STRICT
4573 /* Strict builds: Check that the value is correct. */
4574 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4575#else
4576 RT_NOREF(poff);
4577#endif
4578 return idxReg;
4579 }
4580 }
4581
4582 return UINT8_MAX;
4583}
4584
4585
4586/**
4587 * Allocates argument registers for a function call.
4588 *
4589 * @returns New code buffer offset on success; throws VBox status code on failure, so no
4590 * need to check the return value.
4591 * @param pReNative The native recompile state.
4592 * @param off The current code buffer offset.
4593 * @param cArgs The number of arguments the function call takes.
4594 */
4595DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
4596{
4597 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
4598 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
4599 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4600 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4601
4602 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4603 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4604 else if (cArgs == 0)
4605 return true;
4606
4607 /*
4608 * Do we get luck and all register are free and not shadowing anything?
4609 */
4610 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
4611 for (uint32_t i = 0; i < cArgs; i++)
4612 {
4613 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4614 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4615 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4616 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4617 }
4618 /*
4619 * Okay, not lucky so we have to free up the registers.
4620 */
4621 else
4622 for (uint32_t i = 0; i < cArgs; i++)
4623 {
4624 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4625 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
4626 {
4627 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4628 {
4629 case kIemNativeWhat_Var:
4630 {
4631 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4632 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4633 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
4634 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4635 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
4636#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4637 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4638#endif
4639
4640 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
4641 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4642 else
4643 {
4644 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4645 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4646 }
4647 break;
4648 }
4649
4650 case kIemNativeWhat_Tmp:
4651 case kIemNativeWhat_Arg:
4652 case kIemNativeWhat_rc:
4653 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4654 default:
4655 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
4656 }
4657
4658 }
4659 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4660 {
4661 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4662 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4663 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4664 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4665 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4666 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4667 }
4668 else
4669 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4670 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4671 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4672 }
4673 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
4674 return true;
4675}
4676
4677
4678DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
4679
4680
4681#if 0
4682/**
4683 * Frees a register assignment of any type.
4684 *
4685 * @param pReNative The native recompile state.
4686 * @param idxHstReg The register to free.
4687 *
4688 * @note Does not update variables.
4689 */
4690DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4691{
4692 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4693 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4694 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
4695 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
4696 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
4697 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
4698 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
4699 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
4700 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
4701 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
4702 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4703 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4704 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4705 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4706
4707 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4708 /* no flushing, right:
4709 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4710 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4711 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4712 */
4713}
4714#endif
4715
4716
4717/**
4718 * Frees a temporary register.
4719 *
4720 * Any shadow copies of guest registers assigned to the host register will not
4721 * be flushed by this operation.
4722 */
4723DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4724{
4725 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4726 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
4727 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4728 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
4729 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4730}
4731
4732
4733/**
4734 * Frees a temporary immediate register.
4735 *
4736 * It is assumed that the call has not modified the register, so it still hold
4737 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
4738 */
4739DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4740{
4741 iemNativeRegFreeTmp(pReNative, idxHstReg);
4742}
4743
4744
4745/**
4746 * Frees a register assigned to a variable.
4747 *
4748 * The register will be disassociated from the variable.
4749 */
4750DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4751{
4752 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4753 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4754 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4755 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4756 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
4757#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4758 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4759#endif
4760
4761 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4762 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4763 if (!fFlushShadows)
4764 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4765 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4766 else
4767 {
4768 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4769 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4770 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4771 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4772 uint64_t fGstRegShadows = fGstRegShadowsOld;
4773 while (fGstRegShadows)
4774 {
4775 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4776 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4777
4778 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4779 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4780 }
4781 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4782 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4783 }
4784}
4785
4786
4787#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4788# ifdef LOG_ENABLED
4789/** Host CPU SIMD register names. */
4790DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
4791{
4792# ifdef RT_ARCH_AMD64
4793 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
4794# elif RT_ARCH_ARM64
4795 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
4796 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
4797# else
4798# error "port me"
4799# endif
4800};
4801# endif
4802
4803
4804/**
4805 * Frees a SIMD register assigned to a variable.
4806 *
4807 * The register will be disassociated from the variable.
4808 */
4809DECLHIDDEN(void) iemNativeSimdRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4810{
4811 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstReg));
4812 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4813 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
4814 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4815 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
4816 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4817
4818 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4819 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
4820 if (!fFlushShadows)
4821 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4822 g_apszIemNativeHstSimdRegNames[idxHstReg], pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows, idxVar));
4823 else
4824 {
4825 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4826 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows;
4827 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
4828 pReNative->Core.bmGstSimdRegShadows &= ~fGstRegShadowsOld;
4829 uint64_t fGstRegShadows = fGstRegShadowsOld;
4830 while (fGstRegShadows)
4831 {
4832 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4833 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4834
4835 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxHstReg);
4836 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = UINT8_MAX;
4837 }
4838 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4839 g_apszIemNativeHstSimdRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4840 }
4841}
4842#endif
4843
4844
4845/**
4846 * Called right before emitting a call instruction to move anything important
4847 * out of call-volatile registers, free and flush the call-volatile registers,
4848 * optionally freeing argument variables.
4849 *
4850 * @returns New code buffer offset, UINT32_MAX on failure.
4851 * @param pReNative The native recompile state.
4852 * @param off The code buffer offset.
4853 * @param cArgs The number of arguments the function call takes.
4854 * It is presumed that the host register part of these have
4855 * been allocated as such already and won't need moving,
4856 * just freeing.
4857 * @param fKeepVars Mask of variables that should keep their register
4858 * assignments. Caller must take care to handle these.
4859 */
4860DECL_HIDDEN_THROW(uint32_t)
4861iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4862{
4863 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4864
4865 /* fKeepVars will reduce this mask. */
4866 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4867
4868 /*
4869 * Move anything important out of volatile registers.
4870 */
4871 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4872 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4873 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4874#ifdef IEMNATIVE_REG_FIXED_TMP0
4875 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4876#endif
4877#ifdef IEMNATIVE_REG_FIXED_TMP1
4878 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
4879#endif
4880#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4881 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4882#endif
4883 & ~g_afIemNativeCallRegs[cArgs];
4884
4885 fRegsToMove &= pReNative->Core.bmHstRegs;
4886 if (!fRegsToMove)
4887 { /* likely */ }
4888 else
4889 {
4890 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4891 while (fRegsToMove != 0)
4892 {
4893 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4894 fRegsToMove &= ~RT_BIT_32(idxReg);
4895
4896 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4897 {
4898 case kIemNativeWhat_Var:
4899 {
4900 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4901 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4902 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4903 Assert(pVar->idxReg == idxReg);
4904 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4905 {
4906 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4907 idxVar, pVar->enmKind, pVar->idxReg));
4908 if (pVar->enmKind != kIemNativeVarKind_Stack)
4909 pVar->idxReg = UINT8_MAX;
4910 else
4911 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4912 }
4913 else
4914 fRegsToFree &= ~RT_BIT_32(idxReg);
4915 continue;
4916 }
4917
4918 case kIemNativeWhat_Arg:
4919 AssertMsgFailed(("What?!?: %u\n", idxReg));
4920 continue;
4921
4922 case kIemNativeWhat_rc:
4923 case kIemNativeWhat_Tmp:
4924 AssertMsgFailed(("Missing free: %u\n", idxReg));
4925 continue;
4926
4927 case kIemNativeWhat_FixedTmp:
4928 case kIemNativeWhat_pVCpuFixed:
4929 case kIemNativeWhat_pCtxFixed:
4930 case kIemNativeWhat_PcShadow:
4931 case kIemNativeWhat_FixedReserved:
4932 case kIemNativeWhat_Invalid:
4933 case kIemNativeWhat_End:
4934 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4935 }
4936 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4937 }
4938 }
4939
4940 /*
4941 * Do the actual freeing.
4942 */
4943 if (pReNative->Core.bmHstRegs & fRegsToFree)
4944 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4945 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4946 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4947
4948 /* If there are guest register shadows in any call-volatile register, we
4949 have to clear the corrsponding guest register masks for each register. */
4950 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4951 if (fHstRegsWithGstShadow)
4952 {
4953 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4954 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
4955 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4956 do
4957 {
4958 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4959 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4960
4961 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4962 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4963 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4964 } while (fHstRegsWithGstShadow != 0);
4965 }
4966
4967 return off;
4968}
4969
4970
4971/**
4972 * Flushes a set of guest register shadow copies.
4973 *
4974 * This is usually done after calling a threaded function or a C-implementation
4975 * of an instruction.
4976 *
4977 * @param pReNative The native recompile state.
4978 * @param fGstRegs Set of guest registers to flush.
4979 */
4980DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4981{
4982 /*
4983 * Reduce the mask by what's currently shadowed
4984 */
4985 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4986 fGstRegs &= bmGstRegShadowsOld;
4987 if (fGstRegs)
4988 {
4989 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4990 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4991 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4992 if (bmGstRegShadowsNew)
4993 {
4994 /*
4995 * Partial.
4996 */
4997 do
4998 {
4999 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5000 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5001 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5002 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
5003 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5004
5005 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
5006 fGstRegs &= ~fInThisHstReg;
5007 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5008 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5009 if (!fGstRegShadowsNew)
5010 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5011 } while (fGstRegs != 0);
5012 }
5013 else
5014 {
5015 /*
5016 * Clear all.
5017 */
5018 do
5019 {
5020 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5021 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5022 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5023 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
5024 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5025
5026 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5027 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5028 } while (fGstRegs != 0);
5029 pReNative->Core.bmHstRegsWithGstShadow = 0;
5030 }
5031 }
5032}
5033
5034
5035/**
5036 * Flushes guest register shadow copies held by a set of host registers.
5037 *
5038 * This is used with the TLB lookup code for ensuring that we don't carry on
5039 * with any guest shadows in volatile registers, as these will get corrupted by
5040 * a TLB miss.
5041 *
5042 * @param pReNative The native recompile state.
5043 * @param fHstRegs Set of host registers to flush guest shadows for.
5044 */
5045DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
5046{
5047 /*
5048 * Reduce the mask by what's currently shadowed.
5049 */
5050 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
5051 fHstRegs &= bmHstRegsWithGstShadowOld;
5052 if (fHstRegs)
5053 {
5054 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
5055 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
5056 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
5057 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
5058 if (bmHstRegsWithGstShadowNew)
5059 {
5060 /*
5061 * Partial (likely).
5062 */
5063 uint64_t fGstShadows = 0;
5064 do
5065 {
5066 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5067 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
5068 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
5069 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
5070
5071 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5072 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5073 fHstRegs &= ~RT_BIT_32(idxHstReg);
5074 } while (fHstRegs != 0);
5075 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
5076 }
5077 else
5078 {
5079 /*
5080 * Clear all.
5081 */
5082 do
5083 {
5084 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5085 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
5086 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
5087 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
5088
5089 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5090 fHstRegs &= ~RT_BIT_32(idxHstReg);
5091 } while (fHstRegs != 0);
5092 pReNative->Core.bmGstRegShadows = 0;
5093 }
5094 }
5095}
5096
5097
5098/**
5099 * Restores guest shadow copies in volatile registers.
5100 *
5101 * This is used after calling a helper function (think TLB miss) to restore the
5102 * register state of volatile registers.
5103 *
5104 * @param pReNative The native recompile state.
5105 * @param off The code buffer offset.
5106 * @param fHstRegsActiveShadows Set of host registers which are allowed to
5107 * be active (allocated) w/o asserting. Hack.
5108 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
5109 * iemNativeVarRestoreVolatileRegsPostHlpCall()
5110 */
5111DECL_HIDDEN_THROW(uint32_t)
5112iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
5113{
5114 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
5115 if (fHstRegs)
5116 {
5117 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
5118 do
5119 {
5120 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5121
5122 /* It's not fatal if a register is active holding a variable that
5123 shadowing a guest register, ASSUMING all pending guest register
5124 writes were flushed prior to the helper call. However, we'll be
5125 emitting duplicate restores, so it wasts code space. */
5126 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
5127 RT_NOREF(fHstRegsActiveShadows);
5128
5129 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5130 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
5131 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
5132 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
5133
5134 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
5135 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
5136
5137 fHstRegs &= ~RT_BIT_32(idxHstReg);
5138 } while (fHstRegs != 0);
5139 }
5140 return off;
5141}
5142
5143
5144
5145
5146/*********************************************************************************************************************************
5147* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
5148*********************************************************************************************************************************/
5149#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5150
5151/**
5152 * Info about shadowed guest SIMD register values.
5153 * @see IEMNATIVEGSTSIMDREG
5154 */
5155static struct
5156{
5157 /** Offset in VMCPU of XMM (low 128-bit) registers. */
5158 uint32_t offXmm;
5159 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
5160 uint32_t offYmm;
5161 /** Name (for logging). */
5162 const char *pszName;
5163} const g_aGstSimdShadowInfo[] =
5164{
5165#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
5166 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
5167 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
5168 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
5169 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
5170 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
5171 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
5172 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
5173 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
5174 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
5175 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
5176 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
5177 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
5178 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
5179 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
5180 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
5181 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
5182 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
5183#undef CPUMCTX_OFF_AND_SIZE
5184};
5185AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
5186
5187
5188/**
5189 * Frees a temporary SIMD register.
5190 *
5191 * Any shadow copies of guest registers assigned to the host register will not
5192 * be flushed by this operation.
5193 */
5194DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
5195{
5196 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
5197 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
5198 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5199 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
5200 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5201}
5202
5203
5204/**
5205 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
5206 *
5207 * @returns New code bufferoffset.
5208 * @param pReNative The native recompile state.
5209 * @param off Current code buffer position.
5210 * @param enmGstSimdReg The guest SIMD register to flush.
5211 */
5212DECL_HIDDEN_THROW(uint32_t)
5213iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
5214{
5215 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5216
5217 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
5218 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
5219 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg),
5220 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)));
5221
5222 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
5223 {
5224 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5225 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
5226 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5227 }
5228
5229 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))
5230 {
5231 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5232 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
5233 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5234 }
5235
5236 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, enmGstSimdReg);
5237 return off;
5238}
5239
5240
5241/**
5242 * Locate a register, possibly freeing one up.
5243 *
5244 * This ASSUMES the caller has done the minimal/optimal allocation checks and
5245 * failed.
5246 *
5247 * @returns Host register number on success. Returns UINT8_MAX if no registers
5248 * found, the caller is supposed to deal with this and raise a
5249 * allocation type specific status code (if desired).
5250 *
5251 * @throws VBox status code if we're run into trouble spilling a variable of
5252 * recording debug info. Does NOT throw anything if we're out of
5253 * registers, though.
5254 */
5255static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
5256 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
5257{
5258 //STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
5259 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5260 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5261
5262 /*
5263 * Try a freed register that's shadowing a guest register.
5264 */
5265 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
5266 if (fRegs)
5267 {
5268 //STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
5269
5270#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5271 /*
5272 * When we have livness information, we use it to kick out all shadowed
5273 * guest register that will not be needed any more in this TB. If we're
5274 * lucky, this may prevent us from ending up here again.
5275 *
5276 * Note! We must consider the previous entry here so we don't free
5277 * anything that the current threaded function requires (current
5278 * entry is produced by the next threaded function).
5279 */
5280 uint32_t const idxCurCall = pReNative->idxCurCall;
5281 if (idxCurCall > 0)
5282 {
5283 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
5284
5285# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5286 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
5287 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
5288 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
5289#else
5290 /* Construct a mask of the registers not in the read or write state.
5291 Note! We could skips writes, if they aren't from us, as this is just
5292 a hack to prevent trashing registers that have just been written
5293 or will be written when we retire the current instruction. */
5294 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5295 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5296 & IEMLIVENESSBIT_MASK;
5297#endif
5298 /* If it matches any shadowed registers. */
5299 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
5300 {
5301 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
5302 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
5303 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
5304
5305 /* See if we've got any unshadowed registers we can return now. */
5306 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
5307 if (fUnshadowedRegs)
5308 {
5309 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
5310 return (fPreferVolatile
5311 ? ASMBitFirstSetU32(fUnshadowedRegs)
5312 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
5313 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
5314 - 1;
5315 }
5316 }
5317 }
5318#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5319
5320 unsigned const idxReg = (fPreferVolatile
5321 ? ASMBitFirstSetU32(fRegs)
5322 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5323 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
5324 - 1;
5325
5326 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
5327 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
5328 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5329 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
5330
5331 /* We need to flush any pending guest register writes this host SIMD register shadows. */
5332 uint32_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5333 uint32_t idxGstSimdReg = 0;
5334 do
5335 {
5336 if (fGstRegShadows & 0x1)
5337 {
5338 *poff = iemNativeSimdRegFlushPendingWrite(pReNative, *poff, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5339 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg));
5340 }
5341 idxGstSimdReg++;
5342 fGstRegShadows >>= 1;
5343 } while (fGstRegShadows);
5344
5345 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5346 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5347 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5348 pReNative->Core.aHstSimdRegs[idxReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5349 return idxReg;
5350 }
5351
5352 /*
5353 * Try free up a variable that's in a register.
5354 *
5355 * We do two rounds here, first evacuating variables we don't need to be
5356 * saved on the stack, then in the second round move things to the stack.
5357 */
5358 //STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
5359 AssertReleaseFailed(); /** @todo No variable support right now. */
5360#if 0
5361 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
5362 {
5363 uint32_t fVars = pReNative->Core.bmSimdVars;
5364 while (fVars)
5365 {
5366 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
5367 uint8_t const idxReg = pReNative->Core.aSimdVars[idxVar].idxReg;
5368 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
5369 && (RT_BIT_32(idxReg) & fRegMask)
5370 && ( iLoop == 0
5371 ? pReNative->Core.aSimdVars[idxVar].enmKind != kIemNativeVarKind_Stack
5372 : pReNative->Core.aSimdVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5373 && !pReNative->Core.aSimdVars[idxVar].fRegAcquired)
5374 {
5375 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
5376 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
5377 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5378 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5379 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
5380 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
5381
5382 if (pReNative->Core.aSimdVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5383 {
5384 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
5385 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
5386 }
5387
5388 pReNative->Core.aSimdVars[idxVar].idxReg = UINT8_MAX;
5389 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
5390
5391 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5392 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5393 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5394 return idxReg;
5395 }
5396 fVars &= ~RT_BIT_32(idxVar);
5397 }
5398 }
5399#endif
5400
5401 AssertFailed();
5402 return UINT8_MAX;
5403}
5404
5405
5406/**
5407 * Flushes a set of guest register shadow copies.
5408 *
5409 * This is usually done after calling a threaded function or a C-implementation
5410 * of an instruction.
5411 *
5412 * @param pReNative The native recompile state.
5413 * @param fGstSimdRegs Set of guest SIMD registers to flush.
5414 */
5415DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
5416{
5417 /*
5418 * Reduce the mask by what's currently shadowed
5419 */
5420 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
5421 fGstSimdRegs &= bmGstSimdRegShadows;
5422 if (fGstSimdRegs)
5423 {
5424 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
5425 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
5426 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
5427 if (bmGstSimdRegShadowsNew)
5428 {
5429 /*
5430 * Partial.
5431 */
5432 do
5433 {
5434 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5435 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5436 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5437 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5438 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5439 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5440
5441 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
5442 fGstSimdRegs &= ~fInThisHstReg;
5443 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5444 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5445 if (!fGstRegShadowsNew)
5446 {
5447 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5448 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5449 }
5450 } while (fGstSimdRegs != 0);
5451 }
5452 else
5453 {
5454 /*
5455 * Clear all.
5456 */
5457 do
5458 {
5459 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5460 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5461 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5462 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5463 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5464 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5465
5466 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5467 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
5468 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5469 } while (fGstSimdRegs != 0);
5470 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
5471 }
5472 }
5473}
5474
5475
5476/**
5477 * Allocates a temporary host SIMD register.
5478 *
5479 * This may emit code to save register content onto the stack in order to free
5480 * up a register.
5481 *
5482 * @returns The host register number; throws VBox status code on failure,
5483 * so no need to check the return value.
5484 * @param pReNative The native recompile state.
5485 * @param poff Pointer to the variable with the code buffer position.
5486 * This will be update if we need to move a variable from
5487 * register to stack in order to satisfy the request.
5488 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5489 * registers (@c true, default) or the other way around
5490 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5491 */
5492DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
5493{
5494 /*
5495 * Try find a completely unused register, preferably a call-volatile one.
5496 */
5497 uint8_t idxSimdReg;
5498 uint32_t fRegs = ~pReNative->Core.bmHstRegs
5499 & ~pReNative->Core.bmHstRegsWithGstShadow
5500 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
5501 if (fRegs)
5502 {
5503 if (fPreferVolatile)
5504 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5505 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5506 else
5507 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5508 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5509 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5510 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5511 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5512 }
5513 else
5514 {
5515 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
5516 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5517 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5518 }
5519
5520 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5521 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5522}
5523
5524
5525/**
5526 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
5527 * registers.
5528 *
5529 * @returns The host register number; throws VBox status code on failure,
5530 * so no need to check the return value.
5531 * @param pReNative The native recompile state.
5532 * @param poff Pointer to the variable with the code buffer position.
5533 * This will be update if we need to move a variable from
5534 * register to stack in order to satisfy the request.
5535 * @param fRegMask Mask of acceptable registers.
5536 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5537 * registers (@c true, default) or the other way around
5538 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5539 */
5540DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
5541 bool fPreferVolatile /*= true*/)
5542{
5543 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5544 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5545
5546 /*
5547 * Try find a completely unused register, preferably a call-volatile one.
5548 */
5549 uint8_t idxSimdReg;
5550 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
5551 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5552 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
5553 & fRegMask;
5554 if (fRegs)
5555 {
5556 if (fPreferVolatile)
5557 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5558 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5559 else
5560 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5561 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5562 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5563 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5564 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5565 }
5566 else
5567 {
5568 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
5569 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5570 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5571 }
5572
5573 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5574 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5575}
5576
5577
5578/**
5579 * Sets the indiactor for which part of the given SIMD register has valid data loaded.
5580 *
5581 * @param pReNative The native recompile state.
5582 * @param idxHstSimdReg The host SIMD register to update the state for.
5583 * @param enmLoadSz The load size to set.
5584 */
5585DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg,
5586 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5587{
5588 /* Everything valid already? -> nothing to do. */
5589 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5590 return;
5591
5592 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid)
5593 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
5594 else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz)
5595 {
5596 Assert( ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128
5597 && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5598 || ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128
5599 && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128));
5600 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256;
5601 }
5602}
5603
5604
5605static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdRegDst,
5606 uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
5607{
5608 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
5609 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst
5610 || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5611 {
5612# ifdef RT_ARCH_ARM64
5613 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
5614 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
5615# endif
5616
5617 if (idxHstSimdRegDst != idxHstSimdRegSrc)
5618 {
5619 switch (enmLoadSzDst)
5620 {
5621 case kIemNativeGstSimdRegLdStSz_256:
5622 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5623 break;
5624 case kIemNativeGstSimdRegLdStSz_Low128:
5625 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5626 break;
5627 case kIemNativeGstSimdRegLdStSz_High128:
5628 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst + 1, idxHstSimdRegSrc + 1);
5629 break;
5630 default:
5631 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5632 }
5633
5634 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst);
5635 }
5636 }
5637 else
5638 {
5639 /* Complicated stuff where the source is currently missing something, later. */
5640 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5641 }
5642
5643 return off;
5644}
5645
5646
5647/**
5648 * Allocates a temporary host SIMD register for keeping a guest
5649 * SIMD register value.
5650 *
5651 * Since we may already have a register holding the guest register value,
5652 * code will be emitted to do the loading if that's not the case. Code may also
5653 * be emitted if we have to free up a register to satify the request.
5654 *
5655 * @returns The host register number; throws VBox status code on failure, so no
5656 * need to check the return value.
5657 * @param pReNative The native recompile state.
5658 * @param poff Pointer to the variable with the code buffer
5659 * position. This will be update if we need to move a
5660 * variable from register to stack in order to satisfy
5661 * the request.
5662 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
5663 * @param enmIntendedUse How the caller will be using the host register.
5664 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
5665 * register is okay (default). The ASSUMPTION here is
5666 * that the caller has already flushed all volatile
5667 * registers, so this is only applied if we allocate a
5668 * new register.
5669 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
5670 */
5671DECL_HIDDEN_THROW(uint8_t)
5672iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
5673 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
5674 bool fNoVolatileRegs /*= false*/)
5675{
5676 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
5677#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
5678 AssertMsg( pReNative->idxCurCall == 0
5679 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5680 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5681 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
5682 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5683 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
5684 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
5685#endif
5686#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5687 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
5688#endif
5689 uint32_t const fRegMask = !fNoVolatileRegs
5690 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
5691 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
5692
5693 /*
5694 * First check if the guest register value is already in a host register.
5695 */
5696 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
5697 {
5698 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5699 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
5700 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
5701 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
5702
5703 /* It's not supposed to be allocated... */
5704 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
5705 {
5706 /*
5707 * If the register will trash the guest shadow copy, try find a
5708 * completely unused register we can use instead. If that fails,
5709 * we need to disassociate the host reg from the guest reg.
5710 */
5711 /** @todo would be nice to know if preserving the register is in any way helpful. */
5712 /* If the purpose is calculations, try duplicate the register value as
5713 we'll be clobbering the shadow. */
5714 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
5715 && ( ~pReNative->Core.bmHstSimdRegs
5716 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5717 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
5718 {
5719 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
5720
5721 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5722
5723 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5724 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5725 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5726 idxSimdReg = idxRegNew;
5727 }
5728 /* If the current register matches the restrictions, go ahead and allocate
5729 it for the caller. */
5730 else if (fRegMask & RT_BIT_32(idxSimdReg))
5731 {
5732 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5733 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
5734 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5735 {
5736 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5737 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxSimdReg, idxSimdReg, enmLoadSz);
5738 else
5739 iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz);
5740 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
5741 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5742 }
5743 else
5744 {
5745 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
5746 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
5747 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
5748 }
5749 }
5750 /* Otherwise, allocate a register that satisfies the caller and transfer
5751 the shadowing if compatible with the intended use. (This basically
5752 means the call wants a non-volatile register (RSP push/pop scenario).) */
5753 else
5754 {
5755 Assert(fNoVolatileRegs);
5756 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
5757 !fNoVolatileRegs
5758 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
5759 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5760 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5761 {
5762 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5763 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
5764 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
5765 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5766 }
5767 else
5768 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5769 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5770 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5771 idxSimdReg = idxRegNew;
5772 }
5773 }
5774 else
5775 {
5776 /*
5777 * Oops. Shadowed guest register already allocated!
5778 *
5779 * Allocate a new register, copy the value and, if updating, the
5780 * guest shadow copy assignment to the new register.
5781 */
5782 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5783 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
5784 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
5785 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
5786
5787 /** @todo share register for readonly access. */
5788 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
5789 enmIntendedUse == kIemNativeGstRegUse_Calculation);
5790
5791 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5792 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5793 else
5794 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5795
5796 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5797 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5798 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
5799 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5800 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5801 else
5802 {
5803 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5804 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
5805 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5806 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5807 }
5808 idxSimdReg = idxRegNew;
5809 }
5810 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
5811
5812#ifdef VBOX_STRICT
5813 /* Strict builds: Check that the value is correct. */
5814 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5815 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
5816#endif
5817
5818 return idxSimdReg;
5819 }
5820
5821 /*
5822 * Allocate a new register, load it with the guest value and designate it as a copy of the
5823 */
5824 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
5825
5826 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5827 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
5828 else
5829 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5830
5831 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5832 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
5833
5834 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
5835 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5836
5837 return idxRegNew;
5838}
5839
5840#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5841
5842
5843
5844/*********************************************************************************************************************************
5845* Code emitters for flushing pending guest register writes and sanity checks *
5846*********************************************************************************************************************************/
5847
5848#ifdef VBOX_STRICT
5849/**
5850 * Does internal register allocator sanity checks.
5851 */
5852DECLHIDDEN(void) iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
5853{
5854 /*
5855 * Iterate host registers building a guest shadowing set.
5856 */
5857 uint64_t bmGstRegShadows = 0;
5858 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
5859 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
5860 while (bmHstRegsWithGstShadow)
5861 {
5862 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
5863 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5864 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5865
5866 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5867 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
5868 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
5869 bmGstRegShadows |= fThisGstRegShadows;
5870 while (fThisGstRegShadows)
5871 {
5872 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
5873 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
5874 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
5875 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
5876 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
5877 }
5878 }
5879 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
5880 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
5881 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
5882
5883 /*
5884 * Now the other way around, checking the guest to host index array.
5885 */
5886 bmHstRegsWithGstShadow = 0;
5887 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
5888 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5889 while (bmGstRegShadows)
5890 {
5891 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
5892 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5893 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
5894
5895 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5896 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
5897 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
5898 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
5899 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5900 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
5901 }
5902 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
5903 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
5904 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
5905}
5906#endif /* VBOX_STRICT */
5907
5908
5909/**
5910 * Flushes any delayed guest register writes.
5911 *
5912 * This must be called prior to calling CImpl functions and any helpers that use
5913 * the guest state (like raising exceptions) and such.
5914 *
5915 * This optimization has not yet been implemented. The first target would be
5916 * RIP updates, since these are the most common ones.
5917 */
5918DECL_HIDDEN_THROW(uint32_t)
5919iemNativeRegFlushPendingWritesSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept, bool fFlushShadows)
5920{
5921#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5922 if (!(fGstShwExcept & kIemNativeGstReg_Pc))
5923 off = iemNativeEmitPcWriteback(pReNative, off);
5924#else
5925 RT_NOREF(pReNative, fGstShwExcept);
5926#endif
5927
5928#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5929 /** @todo r=bird: There must be a quicker way to check if anything needs
5930 * doing and then call simd function to do the flushing */
5931 /** @todo This doesn't mix well with fGstShwExcept but we ignore this for now and just flush everything. */
5932 for (uint8_t idxGstSimdReg = 0; idxGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo); idxGstSimdReg++)
5933 {
5934 Assert( (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg)
5935 || !IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg)));
5936
5937 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg))
5938 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5939
5940 if ( fFlushShadows
5941 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg))
5942 {
5943 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxGstSimdReg];
5944
5945 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
5946 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg)));
5947 }
5948 }
5949#else
5950 RT_NOREF(pReNative, fGstShwExcept, fFlushShadows);
5951#endif
5952
5953 return off;
5954}
5955
5956
5957#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5958/**
5959 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
5960 */
5961DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcWritebackSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5962{
5963 Assert(pReNative->Core.offPc);
5964# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5965 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5966 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, pReNative->Core.cInstrPcUpdateSkipped);
5967# endif
5968
5969# ifndef IEMNATIVE_REG_FIXED_PC_DBG
5970 /* Allocate a temporary PC register. */
5971 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5972
5973 /* Perform the addition and store the result. */
5974 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5975 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5976
5977 /* Free but don't flush the PC register. */
5978 iemNativeRegFreeTmp(pReNative, idxPcReg);
5979# else
5980 /* Compare the shadow with the context value, they should match. */
5981 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
5982 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
5983# endif
5984
5985 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, pReNative->Core.cInstrPcUpdateSkipped);
5986 pReNative->Core.offPc = 0;
5987 pReNative->Core.cInstrPcUpdateSkipped = 0;
5988
5989 return off;
5990}
5991#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
5992
5993
5994/*********************************************************************************************************************************
5995* Code Emitters (larger snippets) *
5996*********************************************************************************************************************************/
5997
5998/**
5999 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
6000 * extending to 64-bit width.
6001 *
6002 * @returns New code buffer offset on success, UINT32_MAX on failure.
6003 * @param pReNative .
6004 * @param off The current code buffer position.
6005 * @param idxHstReg The host register to load the guest register value into.
6006 * @param enmGstReg The guest register to load.
6007 *
6008 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6009 * that is something the caller needs to do if applicable.
6010 */
6011DECL_HIDDEN_THROW(uint32_t)
6012iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
6013{
6014 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
6015 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
6016
6017 switch (g_aGstShadowInfo[enmGstReg].cb)
6018 {
6019 case sizeof(uint64_t):
6020 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6021 case sizeof(uint32_t):
6022 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6023 case sizeof(uint16_t):
6024 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6025#if 0 /* not present in the table. */
6026 case sizeof(uint8_t):
6027 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6028#endif
6029 default:
6030 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6031 }
6032}
6033
6034
6035#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6036/**
6037 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
6038 *
6039 * @returns New code buffer offset on success, UINT32_MAX on failure.
6040 * @param pReNative The recompiler state.
6041 * @param off The current code buffer position.
6042 * @param idxHstSimdReg The host register to load the guest register value into.
6043 * @param enmGstSimdReg The guest register to load.
6044 * @param enmLoadSz The load size of the register.
6045 *
6046 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6047 * that is something the caller needs to do if applicable.
6048 */
6049DECL_HIDDEN_THROW(uint32_t)
6050iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
6051 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6052{
6053 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
6054
6055 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz);
6056 switch (enmLoadSz)
6057 {
6058 case kIemNativeGstSimdRegLdStSz_256:
6059 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6060 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6061 case kIemNativeGstSimdRegLdStSz_Low128:
6062 return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6063 case kIemNativeGstSimdRegLdStSz_High128:
6064 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6065 default:
6066 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6067 }
6068}
6069#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6070
6071#ifdef VBOX_STRICT
6072
6073/**
6074 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
6075 *
6076 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6077 * Trashes EFLAGS on AMD64.
6078 */
6079DECL_HIDDEN_THROW(uint32_t)
6080iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
6081{
6082# ifdef RT_ARCH_AMD64
6083 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6084
6085 /* rol reg64, 32 */
6086 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6087 pbCodeBuf[off++] = 0xc1;
6088 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6089 pbCodeBuf[off++] = 32;
6090
6091 /* test reg32, ffffffffh */
6092 if (idxReg >= 8)
6093 pbCodeBuf[off++] = X86_OP_REX_B;
6094 pbCodeBuf[off++] = 0xf7;
6095 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6096 pbCodeBuf[off++] = 0xff;
6097 pbCodeBuf[off++] = 0xff;
6098 pbCodeBuf[off++] = 0xff;
6099 pbCodeBuf[off++] = 0xff;
6100
6101 /* je/jz +1 */
6102 pbCodeBuf[off++] = 0x74;
6103 pbCodeBuf[off++] = 0x01;
6104
6105 /* int3 */
6106 pbCodeBuf[off++] = 0xcc;
6107
6108 /* rol reg64, 32 */
6109 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6110 pbCodeBuf[off++] = 0xc1;
6111 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6112 pbCodeBuf[off++] = 32;
6113
6114# elif defined(RT_ARCH_ARM64)
6115 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6116 /* lsr tmp0, reg64, #32 */
6117 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
6118 /* cbz tmp0, +1 */
6119 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6120 /* brk #0x1100 */
6121 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
6122
6123# else
6124# error "Port me!"
6125# endif
6126 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6127 return off;
6128}
6129
6130
6131/**
6132 * Emitting code that checks that the content of register @a idxReg is the same
6133 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
6134 * instruction if that's not the case.
6135 *
6136 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6137 * Trashes EFLAGS on AMD64.
6138 */
6139DECL_HIDDEN_THROW(uint32_t)
6140iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
6141{
6142# ifdef RT_ARCH_AMD64
6143 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6144
6145 /* cmp reg, [mem] */
6146 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
6147 {
6148 if (idxReg >= 8)
6149 pbCodeBuf[off++] = X86_OP_REX_R;
6150 pbCodeBuf[off++] = 0x38;
6151 }
6152 else
6153 {
6154 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
6155 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
6156 else
6157 {
6158 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
6159 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6160 else
6161 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
6162 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
6163 if (idxReg >= 8)
6164 pbCodeBuf[off++] = X86_OP_REX_R;
6165 }
6166 pbCodeBuf[off++] = 0x39;
6167 }
6168 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
6169
6170 /* je/jz +1 */
6171 pbCodeBuf[off++] = 0x74;
6172 pbCodeBuf[off++] = 0x01;
6173
6174 /* int3 */
6175 pbCodeBuf[off++] = 0xcc;
6176
6177 /* For values smaller than the register size, we must check that the rest
6178 of the register is all zeros. */
6179 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
6180 {
6181 /* test reg64, imm32 */
6182 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6183 pbCodeBuf[off++] = 0xf7;
6184 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6185 pbCodeBuf[off++] = 0;
6186 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
6187 pbCodeBuf[off++] = 0xff;
6188 pbCodeBuf[off++] = 0xff;
6189
6190 /* je/jz +1 */
6191 pbCodeBuf[off++] = 0x74;
6192 pbCodeBuf[off++] = 0x01;
6193
6194 /* int3 */
6195 pbCodeBuf[off++] = 0xcc;
6196 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6197 }
6198 else
6199 {
6200 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6201 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
6202 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
6203 }
6204
6205# elif defined(RT_ARCH_ARM64)
6206 /* mov TMP0, [gstreg] */
6207 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
6208
6209 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6210 /* sub tmp0, tmp0, idxReg */
6211 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
6212 /* cbz tmp0, +1 */
6213 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6214 /* brk #0x1000+enmGstReg */
6215 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
6216 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6217
6218# else
6219# error "Port me!"
6220# endif
6221 return off;
6222}
6223
6224
6225# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6226# ifdef RT_ARCH_AMD64
6227/**
6228 * Helper for AMD64 to emit code which checks the low 128-bits of the given SIMD register against the given vCPU offset.
6229 */
6230DECL_FORCE_INLINE_THROW(uint32_t) iemNativeEmitGuestSimdRegValueCheckVCpuU128(uint8_t * const pbCodeBuf, uint32_t off, uint8_t idxSimdReg, uint32_t offVCpu)
6231{
6232 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
6233 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6234 if (idxSimdReg >= 8)
6235 pbCodeBuf[off++] = X86_OP_REX_R;
6236 pbCodeBuf[off++] = 0x0f;
6237 pbCodeBuf[off++] = 0x38;
6238 pbCodeBuf[off++] = 0x29;
6239 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxSimdReg, offVCpu);
6240
6241 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
6242 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6243 pbCodeBuf[off++] = X86_OP_REX_W
6244 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
6245 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6246 pbCodeBuf[off++] = 0x0f;
6247 pbCodeBuf[off++] = 0x3a;
6248 pbCodeBuf[off++] = 0x16;
6249 pbCodeBuf[off++] = 0xeb;
6250 pbCodeBuf[off++] = 0x00;
6251
6252 /* cmp tmp0, 0xffffffffffffffff. */
6253 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6254 pbCodeBuf[off++] = 0x83;
6255 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6256 pbCodeBuf[off++] = 0xff;
6257
6258 /* je/jz +1 */
6259 pbCodeBuf[off++] = 0x74;
6260 pbCodeBuf[off++] = 0x01;
6261
6262 /* int3 */
6263 pbCodeBuf[off++] = 0xcc;
6264
6265 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
6266 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6267 pbCodeBuf[off++] = X86_OP_REX_W
6268 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
6269 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6270 pbCodeBuf[off++] = 0x0f;
6271 pbCodeBuf[off++] = 0x3a;
6272 pbCodeBuf[off++] = 0x16;
6273 pbCodeBuf[off++] = 0xeb;
6274 pbCodeBuf[off++] = 0x01;
6275
6276 /* cmp tmp0, 0xffffffffffffffff. */
6277 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6278 pbCodeBuf[off++] = 0x83;
6279 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6280 pbCodeBuf[off++] = 0xff;
6281
6282 /* je/jz +1 */
6283 pbCodeBuf[off++] = 0x74;
6284 pbCodeBuf[off++] = 0x01;
6285
6286 /* int3 */
6287 pbCodeBuf[off++] = 0xcc;
6288
6289 return off;
6290}
6291# endif
6292
6293
6294/**
6295 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
6296 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
6297 * instruction if that's not the case.
6298 *
6299 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
6300 * Trashes EFLAGS on AMD64.
6301 */
6302DECL_HIDDEN_THROW(uint32_t)
6303iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
6304 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6305{
6306 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
6307 if ( ( enmLoadSz == kIemNativeGstSimdRegLdStSz_256
6308 && ( IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)
6309 || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6310 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128
6311 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
6312 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_High128
6313 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6314 return off;
6315
6316# ifdef RT_ARCH_AMD64
6317 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6318 {
6319 /* movdqa vectmp0, idxSimdReg */
6320 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6321
6322 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
6323
6324 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6325 g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6326 }
6327
6328 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6329 {
6330 /* Due to the fact that CPUMCTX stores the high 128-bit separately we need to do this all over again for the high part. */
6331 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 50);
6332
6333 /* vextracti128 vectmp0, idxSimdReg, 1 */
6334 pbCodeBuf[off++] = X86_OP_VEX3;
6335 pbCodeBuf[off++] = (idxSimdReg < 8 ? X86_OP_VEX3_BYTE1_R : 0)
6336 | X86_OP_VEX3_BYTE1_X
6337 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? X86_OP_VEX3_BYTE1_B : 0)
6338 | 0x03; /* Opcode map */
6339 pbCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX3_BYTE2_P_066H);
6340 pbCodeBuf[off++] = 0x39;
6341 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxSimdReg & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
6342 pbCodeBuf[off++] = 0x01;
6343
6344 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6345 g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6346 }
6347# elif defined(RT_ARCH_ARM64)
6348 /* mov vectmp0, [gstreg] */
6349 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
6350
6351 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6352 {
6353 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6354 /* eor vectmp0, vectmp0, idxSimdReg */
6355 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6356 /* cnt vectmp0, vectmp0, #0*/
6357 pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0);
6358 /* umov tmp0, vectmp0.D[0] */
6359 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6360 0 /*idxElem*/, kArmv8InstrUmovInsSz_U64);
6361 /* cbz tmp0, +1 */
6362 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6363 /* brk #0x1000+enmGstReg */
6364 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6365 }
6366
6367 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6368 {
6369 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6370 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
6371 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg + 1);
6372 /* cnt vectmp0 + 1, vectmp0 + 1, #0*/
6373 pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1);
6374 /* umov tmp0, (vectmp0 + 1).D[0] */
6375 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
6376 0 /*idxElem*/, kArmv8InstrUmovInsSz_U64);
6377 /* cbz tmp0, +1 */
6378 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6379 /* brk #0x1000+enmGstReg */
6380 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6381 }
6382
6383# else
6384# error "Port me!"
6385# endif
6386
6387 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6388 return off;
6389}
6390# endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6391
6392
6393/**
6394 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
6395 * important bits.
6396 *
6397 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6398 * Trashes EFLAGS on AMD64.
6399 */
6400DECL_HIDDEN_THROW(uint32_t)
6401iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
6402{
6403 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6404 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
6405 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
6406 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
6407
6408#ifdef RT_ARCH_AMD64
6409 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6410
6411 /* je/jz +1 */
6412 pbCodeBuf[off++] = 0x74;
6413 pbCodeBuf[off++] = 0x01;
6414
6415 /* int3 */
6416 pbCodeBuf[off++] = 0xcc;
6417
6418# elif defined(RT_ARCH_ARM64)
6419 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6420
6421 /* b.eq +1 */
6422 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
6423 /* brk #0x2000 */
6424 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
6425
6426# else
6427# error "Port me!"
6428# endif
6429 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6430
6431 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6432 return off;
6433}
6434
6435#endif /* VBOX_STRICT */
6436
6437
6438#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6439/**
6440 * Worker for IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK.
6441 */
6442DECL_HIDDEN_THROW(uint32_t)
6443iemNativeEmitEFlagsSkippingCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflNeeded)
6444{
6445 uint32_t const offVCpu = RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags);
6446
6447 fEflNeeded &= X86_EFL_STATUS_BITS;
6448 if (fEflNeeded)
6449 {
6450# ifdef RT_ARCH_AMD64
6451 /* test dword [pVCpu + offVCpu], imm32 */
6452 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6453 if (fEflNeeded <= 0xff)
6454 {
6455 pCodeBuf[off++] = 0xf6;
6456 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6457 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6458 }
6459 else
6460 {
6461 pCodeBuf[off++] = 0xf7;
6462 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6463 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6464 pCodeBuf[off++] = RT_BYTE2(fEflNeeded);
6465 pCodeBuf[off++] = RT_BYTE3(fEflNeeded);
6466 pCodeBuf[off++] = RT_BYTE4(fEflNeeded);
6467 }
6468 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6469
6470# else
6471 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6472 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, offVCpu);
6473 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp, fEflNeeded);
6474# ifdef RT_ARCH_ARM64
6475 off = iemNativeEmitJzToFixed(pReNative, off, off + 2);
6476 off = iemNativeEmitBrk(pReNative, off, 0x7777);
6477# else
6478# error "Port me!"
6479# endif
6480 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6481# endif
6482 }
6483 return off;
6484}
6485#endif /* IEMNATIVE_STRICT_EFLAGS_SKIPPING */
6486
6487
6488/**
6489 * Emits a code for checking the return code of a call and rcPassUp, returning
6490 * from the code if either are non-zero.
6491 */
6492DECL_HIDDEN_THROW(uint32_t)
6493iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6494{
6495#ifdef RT_ARCH_AMD64
6496 /*
6497 * AMD64: eax = call status code.
6498 */
6499
6500 /* edx = rcPassUp */
6501 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6502# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6503 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
6504# endif
6505
6506 /* edx = eax | rcPassUp */
6507 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6508 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
6509 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
6510 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6511
6512 /* Jump to non-zero status return path. */
6513 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
6514
6515 /* done. */
6516
6517#elif RT_ARCH_ARM64
6518 /*
6519 * ARM64: w0 = call status code.
6520 */
6521# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6522 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
6523# endif
6524 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6525
6526 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6527
6528 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
6529
6530 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6531 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6532 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
6533
6534#else
6535# error "port me"
6536#endif
6537 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6538 RT_NOREF_PV(idxInstr);
6539 return off;
6540}
6541
6542
6543/**
6544 * Emits code to check if the content of @a idxAddrReg is a canonical address,
6545 * raising a \#GP(0) if it isn't.
6546 *
6547 * @returns New code buffer offset, UINT32_MAX on failure.
6548 * @param pReNative The native recompile state.
6549 * @param off The code buffer offset.
6550 * @param idxAddrReg The host register with the address to check.
6551 * @param idxInstr The current instruction.
6552 */
6553DECL_HIDDEN_THROW(uint32_t)
6554iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
6555{
6556 /*
6557 * Make sure we don't have any outstanding guest register writes as we may
6558 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6559 */
6560 off = iemNativeRegFlushPendingWrites(pReNative, off);
6561
6562#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6563 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6564#else
6565 RT_NOREF(idxInstr);
6566#endif
6567
6568#ifdef RT_ARCH_AMD64
6569 /*
6570 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
6571 * return raisexcpt();
6572 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
6573 */
6574 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6575
6576 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
6577 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
6578 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
6579 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
6580 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6581
6582 iemNativeRegFreeTmp(pReNative, iTmpReg);
6583
6584#elif defined(RT_ARCH_ARM64)
6585 /*
6586 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
6587 * return raisexcpt();
6588 * ----
6589 * mov x1, 0x800000000000
6590 * add x1, x0, x1
6591 * cmp xzr, x1, lsr 48
6592 * b.ne .Lraisexcpt
6593 */
6594 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6595
6596 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
6597 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
6598 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
6599 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6600
6601 iemNativeRegFreeTmp(pReNative, iTmpReg);
6602
6603#else
6604# error "Port me"
6605#endif
6606 return off;
6607}
6608
6609
6610/**
6611 * Emits code to check if that the content of @a idxAddrReg is within the limit
6612 * of CS, raising a \#GP(0) if it isn't.
6613 *
6614 * @returns New code buffer offset; throws VBox status code on error.
6615 * @param pReNative The native recompile state.
6616 * @param off The code buffer offset.
6617 * @param idxAddrReg The host register (32-bit) with the address to
6618 * check.
6619 * @param idxInstr The current instruction.
6620 */
6621DECL_HIDDEN_THROW(uint32_t)
6622iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6623 uint8_t idxAddrReg, uint8_t idxInstr)
6624{
6625 /*
6626 * Make sure we don't have any outstanding guest register writes as we may
6627 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6628 */
6629 off = iemNativeRegFlushPendingWrites(pReNative, off);
6630
6631#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6632 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6633#else
6634 RT_NOREF(idxInstr);
6635#endif
6636
6637 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
6638 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
6639 kIemNativeGstRegUse_ReadOnly);
6640
6641 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
6642 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6643
6644 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
6645 return off;
6646}
6647
6648
6649/**
6650 * Emits a call to a CImpl function or something similar.
6651 */
6652DECL_HIDDEN_THROW(uint32_t)
6653iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
6654 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
6655{
6656 /* Writeback everything. */
6657 off = iemNativeRegFlushPendingWrites(pReNative, off);
6658
6659 /*
6660 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
6661 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
6662 */
6663 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
6664 fGstShwFlush
6665 | RT_BIT_64(kIemNativeGstReg_Pc)
6666 | RT_BIT_64(kIemNativeGstReg_EFlags));
6667 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6668
6669 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6670
6671 /*
6672 * Load the parameters.
6673 */
6674#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
6675 /* Special code the hidden VBOXSTRICTRC pointer. */
6676 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6677 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6678 if (cAddParams > 0)
6679 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
6680 if (cAddParams > 1)
6681 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
6682 if (cAddParams > 2)
6683 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
6684 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6685
6686#else
6687 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6688 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6689 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6690 if (cAddParams > 0)
6691 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
6692 if (cAddParams > 1)
6693 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
6694 if (cAddParams > 2)
6695# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
6696 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
6697# else
6698 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
6699# endif
6700#endif
6701
6702 /*
6703 * Make the call.
6704 */
6705 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
6706
6707#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6708 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6709#endif
6710
6711 /*
6712 * Check the status code.
6713 */
6714 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
6715}
6716
6717
6718/**
6719 * Emits a call to a threaded worker function.
6720 */
6721DECL_HIDDEN_THROW(uint32_t)
6722iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6723{
6724 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6725
6726 /* We don't know what the threaded function is doing so we must flush all pending writes. */
6727 off = iemNativeRegFlushPendingWrites(pReNative, off);
6728
6729 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
6730 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6731
6732#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6733 /* The threaded function may throw / long jmp, so set current instruction
6734 number if we're counting. */
6735 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6736#endif
6737
6738 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
6739
6740#ifdef RT_ARCH_AMD64
6741 /* Load the parameters and emit the call. */
6742# ifdef RT_OS_WINDOWS
6743# ifndef VBOXSTRICTRC_STRICT_ENABLED
6744 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6745 if (cParams > 0)
6746 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
6747 if (cParams > 1)
6748 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
6749 if (cParams > 2)
6750 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
6751# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
6752 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
6753 if (cParams > 0)
6754 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
6755 if (cParams > 1)
6756 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
6757 if (cParams > 2)
6758 {
6759 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
6760 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
6761 }
6762 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6763# endif /* VBOXSTRICTRC_STRICT_ENABLED */
6764# else
6765 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6766 if (cParams > 0)
6767 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
6768 if (cParams > 1)
6769 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
6770 if (cParams > 2)
6771 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
6772# endif
6773
6774 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6775
6776# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6777 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6778# endif
6779
6780#elif RT_ARCH_ARM64
6781 /*
6782 * ARM64:
6783 */
6784 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6785 if (cParams > 0)
6786 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
6787 if (cParams > 1)
6788 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
6789 if (cParams > 2)
6790 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
6791
6792 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6793
6794#else
6795# error "port me"
6796#endif
6797
6798 /*
6799 * Check the status code.
6800 */
6801 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
6802
6803 return off;
6804}
6805
6806#ifdef VBOX_WITH_STATISTICS
6807/**
6808 * Emits code to update the thread call statistics.
6809 */
6810DECL_INLINE_THROW(uint32_t)
6811iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6812{
6813 /*
6814 * Update threaded function stats.
6815 */
6816 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
6817 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
6818# if defined(RT_ARCH_ARM64)
6819 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6820 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6821 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
6822 iemNativeRegFreeTmp(pReNative, idxTmp1);
6823 iemNativeRegFreeTmp(pReNative, idxTmp2);
6824# else
6825 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
6826# endif
6827 return off;
6828}
6829#endif /* VBOX_WITH_STATISTICS */
6830
6831
6832/**
6833 * Emits the code at the ReturnWithFlags label (returns
6834 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
6835 */
6836static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6837{
6838 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
6839 if (idxLabel != UINT32_MAX)
6840 {
6841 iemNativeLabelDefine(pReNative, idxLabel, off);
6842
6843 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
6844
6845 /* jump back to the return sequence. */
6846 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6847 }
6848 return off;
6849}
6850
6851
6852/**
6853 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
6854 */
6855static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6856{
6857 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
6858 if (idxLabel != UINT32_MAX)
6859 {
6860 iemNativeLabelDefine(pReNative, idxLabel, off);
6861
6862 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
6863
6864 /* jump back to the return sequence. */
6865 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6866 }
6867 return off;
6868}
6869
6870
6871/**
6872 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
6873 */
6874static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6875{
6876 /*
6877 * Generate the rc + rcPassUp fiddling code if needed.
6878 */
6879 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6880 if (idxLabel != UINT32_MAX)
6881 {
6882 iemNativeLabelDefine(pReNative, idxLabel, off);
6883
6884 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
6885#ifdef RT_ARCH_AMD64
6886# ifdef RT_OS_WINDOWS
6887# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6888 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
6889# endif
6890 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6891 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
6892# else
6893 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6894 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
6895# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6896 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
6897# endif
6898# endif
6899# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6900 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
6901# endif
6902
6903#else
6904 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
6905 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6906 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
6907#endif
6908
6909 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
6910 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6911 }
6912 return off;
6913}
6914
6915
6916/**
6917 * Emits a standard epilog.
6918 */
6919static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
6920{
6921 *pidxReturnLabel = UINT32_MAX;
6922
6923 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
6924 off = iemNativeRegFlushPendingWrites(pReNative, off);
6925
6926 /*
6927 * Successful return, so clear the return register (eax, w0).
6928 */
6929 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
6930
6931 /*
6932 * Define label for common return point.
6933 */
6934 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
6935 *pidxReturnLabel = idxReturn;
6936
6937 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6938
6939 /*
6940 * Restore registers and return.
6941 */
6942#ifdef RT_ARCH_AMD64
6943 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6944
6945 /* Reposition esp at the r15 restore point. */
6946 pbCodeBuf[off++] = X86_OP_REX_W;
6947 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
6948 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
6949 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
6950
6951 /* Pop non-volatile registers and return */
6952 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
6953 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
6954 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
6955 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
6956 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
6957 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
6958 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
6959 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
6960# ifdef RT_OS_WINDOWS
6961 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
6962 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
6963# endif
6964 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
6965 pbCodeBuf[off++] = 0xc9; /* leave */
6966 pbCodeBuf[off++] = 0xc3; /* ret */
6967 pbCodeBuf[off++] = 0xcc; /* int3 poison */
6968
6969#elif RT_ARCH_ARM64
6970 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6971
6972 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
6973 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
6974 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6975 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6976 IEMNATIVE_FRAME_VAR_SIZE / 8);
6977 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
6978 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6979 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6980 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6981 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6982 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6983 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6984 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6985 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6986 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6987 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6988 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6989
6990 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
6991 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
6992 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
6993 IEMNATIVE_FRAME_SAVE_REG_SIZE);
6994
6995 /* retab / ret */
6996# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
6997 if (1)
6998 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
6999 else
7000# endif
7001 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
7002
7003#else
7004# error "port me"
7005#endif
7006 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7007
7008 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
7009}
7010
7011
7012/**
7013 * Emits a standard prolog.
7014 */
7015static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7016{
7017#ifdef RT_ARCH_AMD64
7018 /*
7019 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
7020 * reserving 64 bytes for stack variables plus 4 non-register argument
7021 * slots. Fixed register assignment: xBX = pReNative;
7022 *
7023 * Since we always do the same register spilling, we can use the same
7024 * unwind description for all the code.
7025 */
7026 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7027 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
7028 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
7029 pbCodeBuf[off++] = 0x8b;
7030 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
7031 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
7032 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
7033# ifdef RT_OS_WINDOWS
7034 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
7035 pbCodeBuf[off++] = 0x8b;
7036 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
7037 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
7038 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
7039# else
7040 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
7041 pbCodeBuf[off++] = 0x8b;
7042 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
7043# endif
7044 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
7045 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
7046 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
7047 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
7048 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
7049 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
7050 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
7051 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
7052
7053# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
7054 /* Save the frame pointer. */
7055 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
7056# endif
7057
7058 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
7059 X86_GREG_xSP,
7060 IEMNATIVE_FRAME_ALIGN_SIZE
7061 + IEMNATIVE_FRAME_VAR_SIZE
7062 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
7063 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
7064 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
7065 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
7066 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
7067
7068#elif RT_ARCH_ARM64
7069 /*
7070 * We set up a stack frame exactly like on x86, only we have to push the
7071 * return address our selves here. We save all non-volatile registers.
7072 */
7073 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
7074
7075# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
7076 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
7077 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
7078 * in any way conditional, so just emitting this instructions now and hoping for the best... */
7079 /* pacibsp */
7080 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
7081# endif
7082
7083 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
7084 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
7085 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
7086 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
7087 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
7088 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
7089 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7090 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
7091 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7092 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
7093 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7094 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
7095 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7096 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
7097 /* Save the BP and LR (ret address) registers at the top of the frame. */
7098 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7099 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
7100 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
7101 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
7102 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
7103 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
7104
7105 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
7106 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
7107
7108 /* mov r28, r0 */
7109 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
7110 /* mov r27, r1 */
7111 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
7112
7113# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
7114 /* Save the frame pointer. */
7115 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
7116 ARMV8_A64_REG_X2);
7117# endif
7118
7119#else
7120# error "port me"
7121#endif
7122 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7123 return off;
7124}
7125
7126
7127/*********************************************************************************************************************************
7128* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
7129*********************************************************************************************************************************/
7130
7131/**
7132 * Internal work that allocates a variable with kind set to
7133 * kIemNativeVarKind_Invalid and no current stack allocation.
7134 *
7135 * The kind will either be set by the caller or later when the variable is first
7136 * assigned a value.
7137 *
7138 * @returns Unpacked index.
7139 * @internal
7140 */
7141static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7142{
7143 Assert(cbType > 0 && cbType <= 64);
7144 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
7145 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
7146 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
7147 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7148 pReNative->Core.aVars[idxVar].cbVar = cbType;
7149 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7150 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7151 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
7152 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
7153 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
7154 pReNative->Core.aVars[idxVar].fRegAcquired = false;
7155 pReNative->Core.aVars[idxVar].u.uValue = 0;
7156#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7157 pReNative->Core.aVars[idxVar].fSimdReg = false;
7158#endif
7159 return idxVar;
7160}
7161
7162
7163/**
7164 * Internal work that allocates an argument variable w/o setting enmKind.
7165 *
7166 * @returns Unpacked index.
7167 * @internal
7168 */
7169static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7170{
7171 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
7172 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7173 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
7174
7175 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7176 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
7177 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
7178 return idxVar;
7179}
7180
7181
7182/**
7183 * Gets the stack slot for a stack variable, allocating one if necessary.
7184 *
7185 * Calling this function implies that the stack slot will contain a valid
7186 * variable value. The caller deals with any register currently assigned to the
7187 * variable, typically by spilling it into the stack slot.
7188 *
7189 * @returns The stack slot number.
7190 * @param pReNative The recompiler state.
7191 * @param idxVar The variable.
7192 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
7193 */
7194DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7195{
7196 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7197 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7198 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
7199
7200 /* Already got a slot? */
7201 uint8_t const idxStackSlot = pVar->idxStackSlot;
7202 if (idxStackSlot != UINT8_MAX)
7203 {
7204 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
7205 return idxStackSlot;
7206 }
7207
7208 /*
7209 * A single slot is easy to allocate.
7210 * Allocate them from the top end, closest to BP, to reduce the displacement.
7211 */
7212 if (pVar->cbVar <= sizeof(uint64_t))
7213 {
7214 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7215 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7216 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
7217 pVar->idxStackSlot = (uint8_t)iSlot;
7218 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
7219 return (uint8_t)iSlot;
7220 }
7221
7222 /*
7223 * We need more than one stack slot.
7224 *
7225 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
7226 */
7227 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
7228 Assert(pVar->cbVar <= 64);
7229 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
7230 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
7231 uint32_t bmStack = pReNative->Core.bmStack;
7232 while (bmStack != UINT32_MAX)
7233 {
7234 unsigned iSlot = ASMBitLastSetU32(~bmStack);
7235 AssertStmt(iSlot, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7236 iSlot = (iSlot - 1) & ~fBitAlignMask;
7237 if ((bmStack & ~(fBitAllocMask << iSlot)) == bmStack)
7238 {
7239 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
7240 pVar->idxStackSlot = (uint8_t)iSlot;
7241 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7242 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
7243 return (uint8_t)iSlot;
7244 }
7245
7246 bmStack |= (fBitAllocMask << iSlot);
7247 }
7248 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7249}
7250
7251
7252/**
7253 * Changes the variable to a stack variable.
7254 *
7255 * Currently this is s only possible to do the first time the variable is used,
7256 * switching later is can be implemented but not done.
7257 *
7258 * @param pReNative The recompiler state.
7259 * @param idxVar The variable.
7260 * @throws VERR_IEM_VAR_IPE_2
7261 */
7262DECL_HIDDEN_THROW(void) iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7263{
7264 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7265 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7266 if (pVar->enmKind != kIemNativeVarKind_Stack)
7267 {
7268 /* We could in theory transition from immediate to stack as well, but it
7269 would involve the caller doing work storing the value on the stack. So,
7270 till that's required we only allow transition from invalid. */
7271 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7272 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7273 pVar->enmKind = kIemNativeVarKind_Stack;
7274
7275 /* Note! We don't allocate a stack slot here, that's only done when a
7276 slot is actually needed to hold a variable value. */
7277 }
7278}
7279
7280
7281/**
7282 * Sets it to a variable with a constant value.
7283 *
7284 * This does not require stack storage as we know the value and can always
7285 * reload it, unless of course it's referenced.
7286 *
7287 * @param pReNative The recompiler state.
7288 * @param idxVar The variable.
7289 * @param uValue The immediate value.
7290 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7291 */
7292DECL_HIDDEN_THROW(void) iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
7293{
7294 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7295 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7296 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7297 {
7298 /* Only simple transitions for now. */
7299 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7300 pVar->enmKind = kIemNativeVarKind_Immediate;
7301 }
7302 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7303
7304 pVar->u.uValue = uValue;
7305 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
7306 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
7307 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
7308}
7309
7310
7311/**
7312 * Sets the variable to a reference (pointer) to @a idxOtherVar.
7313 *
7314 * This does not require stack storage as we know the value and can always
7315 * reload it. Loading is postponed till needed.
7316 *
7317 * @param pReNative The recompiler state.
7318 * @param idxVar The variable. Unpacked.
7319 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
7320 *
7321 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7322 * @internal
7323 */
7324static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
7325{
7326 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
7327 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
7328
7329 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
7330 {
7331 /* Only simple transitions for now. */
7332 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7333 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7334 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
7335 }
7336 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7337
7338 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
7339
7340 /* Update the other variable, ensure it's a stack variable. */
7341 /** @todo handle variables with const values... that'll go boom now. */
7342 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
7343 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
7344}
7345
7346
7347/**
7348 * Sets the variable to a reference (pointer) to a guest register reference.
7349 *
7350 * This does not require stack storage as we know the value and can always
7351 * reload it. Loading is postponed till needed.
7352 *
7353 * @param pReNative The recompiler state.
7354 * @param idxVar The variable.
7355 * @param enmRegClass The class guest registers to reference.
7356 * @param idxReg The register within @a enmRegClass to reference.
7357 *
7358 * @throws VERR_IEM_VAR_IPE_2
7359 */
7360DECL_HIDDEN_THROW(void) iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
7361 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
7362{
7363 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7364 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7365
7366 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
7367 {
7368 /* Only simple transitions for now. */
7369 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7370 pVar->enmKind = kIemNativeVarKind_GstRegRef;
7371 }
7372 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7373
7374 pVar->u.GstRegRef.enmClass = enmRegClass;
7375 pVar->u.GstRegRef.idx = idxReg;
7376}
7377
7378
7379DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7380{
7381 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7382}
7383
7384
7385DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
7386{
7387 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7388
7389 /* Since we're using a generic uint64_t value type, we must truncate it if
7390 the variable is smaller otherwise we may end up with too large value when
7391 scaling up a imm8 w/ sign-extension.
7392
7393 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
7394 in the bios, bx=1) when running on arm, because clang expect 16-bit
7395 register parameters to have bits 16 and up set to zero. Instead of
7396 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
7397 CF value in the result. */
7398 switch (cbType)
7399 {
7400 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7401 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7402 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7403 }
7404 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7405 return idxVar;
7406}
7407
7408
7409DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
7410{
7411 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
7412 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
7413 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
7414 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
7415 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
7416 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7417
7418 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
7419 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
7420 return idxArgVar;
7421}
7422
7423
7424DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7425{
7426 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7427 /* Don't set to stack now, leave that to the first use as for instance
7428 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
7429 return idxVar;
7430}
7431
7432
7433DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
7434{
7435 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7436
7437 /* Since we're using a generic uint64_t value type, we must truncate it if
7438 the variable is smaller otherwise we may end up with too large value when
7439 scaling up a imm8 w/ sign-extension. */
7440 switch (cbType)
7441 {
7442 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7443 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7444 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7445 }
7446 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7447 return idxVar;
7448}
7449
7450
7451/**
7452 * Makes sure variable @a idxVar has a register assigned to it and that it stays
7453 * fixed till we call iemNativeVarRegisterRelease.
7454 *
7455 * @returns The host register number.
7456 * @param pReNative The recompiler state.
7457 * @param idxVar The variable.
7458 * @param poff Pointer to the instruction buffer offset.
7459 * In case a register needs to be freed up or the value
7460 * loaded off the stack.
7461 * @param fInitialized Set if the variable must already have been initialized.
7462 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7463 * the case.
7464 * @param idxRegPref Preferred register number or UINT8_MAX.
7465 */
7466DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7467 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7468{
7469 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7470 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7471 Assert(pVar->cbVar <= 8);
7472 Assert(!pVar->fRegAcquired);
7473
7474 uint8_t idxReg = pVar->idxReg;
7475 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7476 {
7477 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7478 && pVar->enmKind < kIemNativeVarKind_End);
7479 pVar->fRegAcquired = true;
7480 return idxReg;
7481 }
7482
7483 /*
7484 * If the kind of variable has not yet been set, default to 'stack'.
7485 */
7486 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7487 && pVar->enmKind < kIemNativeVarKind_End);
7488 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7489 iemNativeVarSetKindToStack(pReNative, idxVar);
7490
7491 /*
7492 * We have to allocate a register for the variable, even if its a stack one
7493 * as we don't know if there are modification being made to it before its
7494 * finalized (todo: analyze and insert hints about that?).
7495 *
7496 * If we can, we try get the correct register for argument variables. This
7497 * is assuming that most argument variables are fetched as close as possible
7498 * to the actual call, so that there aren't any interfering hidden calls
7499 * (memory accesses, etc) inbetween.
7500 *
7501 * If we cannot or it's a variable, we make sure no argument registers
7502 * that will be used by this MC block will be allocated here, and we always
7503 * prefer non-volatile registers to avoid needing to spill stuff for internal
7504 * call.
7505 */
7506 /** @todo Detect too early argument value fetches and warn about hidden
7507 * calls causing less optimal code to be generated in the python script. */
7508
7509 uint8_t const uArgNo = pVar->uArgNo;
7510 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
7511 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
7512 {
7513 idxReg = g_aidxIemNativeCallRegs[uArgNo];
7514 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7515 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
7516 }
7517 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
7518 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
7519 {
7520 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7521 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
7522 & ~pReNative->Core.bmHstRegsWithGstShadow
7523 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
7524 & fNotArgsMask;
7525 if (fRegs)
7526 {
7527 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
7528 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
7529 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
7530 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
7531 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
7532 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7533 }
7534 else
7535 {
7536 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7537 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
7538 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7539 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7540 }
7541 }
7542 else
7543 {
7544 idxReg = idxRegPref;
7545 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7546 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7547 }
7548 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7549 pVar->idxReg = idxReg;
7550
7551#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7552 pVar->fSimdReg = false;
7553#endif
7554
7555 /*
7556 * Load it off the stack if we've got a stack slot.
7557 */
7558 uint8_t const idxStackSlot = pVar->idxStackSlot;
7559 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7560 {
7561 Assert(fInitialized);
7562 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7563 switch (pVar->cbVar)
7564 {
7565 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
7566 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
7567 case 3: AssertFailed(); RT_FALL_THRU();
7568 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
7569 default: AssertFailed(); RT_FALL_THRU();
7570 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
7571 }
7572 }
7573 else
7574 {
7575 Assert(idxStackSlot == UINT8_MAX);
7576 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7577 }
7578 pVar->fRegAcquired = true;
7579 return idxReg;
7580}
7581
7582
7583#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7584/**
7585 * Makes sure variable @a idxVar has a SIMD register assigned to it and that it stays
7586 * fixed till we call iemNativeVarRegisterRelease.
7587 *
7588 * @returns The host register number.
7589 * @param pReNative The recompiler state.
7590 * @param idxVar The variable.
7591 * @param poff Pointer to the instruction buffer offset.
7592 * In case a register needs to be freed up or the value
7593 * loaded off the stack.
7594 * @param fInitialized Set if the variable must already have been initialized.
7595 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7596 * the case.
7597 * @param idxRegPref Preferred SIMD register number or UINT8_MAX.
7598 */
7599DECL_HIDDEN_THROW(uint8_t) iemNativeVarSimdRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7600 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7601{
7602 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7603 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7604 Assert( pVar->cbVar == sizeof(RTUINT128U)
7605 || pVar->cbVar == sizeof(RTUINT256U));
7606 Assert(!pVar->fRegAcquired);
7607
7608 uint8_t idxReg = pVar->idxReg;
7609 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs))
7610 {
7611 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7612 && pVar->enmKind < kIemNativeVarKind_End);
7613 pVar->fRegAcquired = true;
7614 return idxReg;
7615 }
7616
7617 /*
7618 * If the kind of variable has not yet been set, default to 'stack'.
7619 */
7620 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7621 && pVar->enmKind < kIemNativeVarKind_End);
7622 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7623 iemNativeVarSetKindToStack(pReNative, idxVar);
7624
7625 /*
7626 * We have to allocate a register for the variable, even if its a stack one
7627 * as we don't know if there are modification being made to it before its
7628 * finalized (todo: analyze and insert hints about that?).
7629 *
7630 * If we can, we try get the correct register for argument variables. This
7631 * is assuming that most argument variables are fetched as close as possible
7632 * to the actual call, so that there aren't any interfering hidden calls
7633 * (memory accesses, etc) inbetween.
7634 *
7635 * If we cannot or it's a variable, we make sure no argument registers
7636 * that will be used by this MC block will be allocated here, and we always
7637 * prefer non-volatile registers to avoid needing to spill stuff for internal
7638 * call.
7639 */
7640 /** @todo Detect too early argument value fetches and warn about hidden
7641 * calls causing less optimal code to be generated in the python script. */
7642
7643 uint8_t const uArgNo = pVar->uArgNo;
7644 Assert(uArgNo == UINT8_MAX); RT_NOREF(uArgNo); /* No SIMD registers as arguments for now. */
7645
7646 /* SIMD is bit simpler for now because there is no support for arguments. */
7647 if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
7648 || (pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegPref)))
7649 {
7650 uint32_t const fNotArgsMask = UINT32_MAX; //~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7651 uint32_t const fRegs = ~pReNative->Core.bmHstSimdRegs
7652 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
7653 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
7654 & fNotArgsMask;
7655 if (fRegs)
7656 {
7657 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
7658 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
7659 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows == 0);
7660 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg)));
7661 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7662 }
7663 else
7664 {
7665 idxReg = iemNativeSimdRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7666 IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & fNotArgsMask);
7667 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7668 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7669 }
7670 }
7671 else
7672 {
7673 idxReg = idxRegPref;
7674 AssertReleaseFailed(); //iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7675 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7676 }
7677 iemNativeSimdRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7678
7679 pVar->fSimdReg = true;
7680 pVar->idxReg = idxReg;
7681
7682 /*
7683 * Load it off the stack if we've got a stack slot.
7684 */
7685 uint8_t const idxStackSlot = pVar->idxStackSlot;
7686 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7687 {
7688 Assert(fInitialized);
7689 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7690 switch (pVar->cbVar)
7691 {
7692 case sizeof(RTUINT128U): *poff = iemNativeEmitLoadVecRegByBpU128(pReNative, *poff, idxReg, offDispBp); break;
7693 default: AssertFailed(); RT_FALL_THRU();
7694 case sizeof(RTUINT256U): *poff = iemNativeEmitLoadVecRegByBpU256(pReNative, *poff, idxReg, offDispBp); break;
7695 }
7696 }
7697 else
7698 {
7699 Assert(idxStackSlot == UINT8_MAX);
7700 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7701 }
7702 pVar->fRegAcquired = true;
7703 return idxReg;
7704}
7705#endif
7706
7707
7708/**
7709 * The value of variable @a idxVar will be written in full to the @a enmGstReg
7710 * guest register.
7711 *
7712 * This function makes sure there is a register for it and sets it to be the
7713 * current shadow copy of @a enmGstReg.
7714 *
7715 * @returns The host register number.
7716 * @param pReNative The recompiler state.
7717 * @param idxVar The variable.
7718 * @param enmGstReg The guest register this variable will be written to
7719 * after this call.
7720 * @param poff Pointer to the instruction buffer offset.
7721 * In case a register needs to be freed up or if the
7722 * variable content needs to be loaded off the stack.
7723 *
7724 * @note We DO NOT expect @a idxVar to be an argument variable,
7725 * because we can only in the commit stage of an instruction when this
7726 * function is used.
7727 */
7728DECL_HIDDEN_THROW(uint8_t)
7729iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
7730{
7731 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7732 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7733 Assert(!pVar->fRegAcquired);
7734 AssertMsgStmt( pVar->cbVar <= 8
7735 && ( pVar->enmKind == kIemNativeVarKind_Immediate
7736 || pVar->enmKind == kIemNativeVarKind_Stack),
7737 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
7738 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
7739 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7740
7741 /*
7742 * This shouldn't ever be used for arguments, unless it's in a weird else
7743 * branch that doesn't do any calling and even then it's questionable.
7744 *
7745 * However, in case someone writes crazy wrong MC code and does register
7746 * updates before making calls, just use the regular register allocator to
7747 * ensure we get a register suitable for the intended argument number.
7748 */
7749 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
7750
7751 /*
7752 * If there is already a register for the variable, we transfer/set the
7753 * guest shadow copy assignment to it.
7754 */
7755 uint8_t idxReg = pVar->idxReg;
7756 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7757 {
7758 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
7759 {
7760 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
7761 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
7762 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
7763 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
7764 }
7765 else
7766 {
7767 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
7768 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
7769 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
7770 }
7771 /** @todo figure this one out. We need some way of making sure the register isn't
7772 * modified after this point, just in case we start writing crappy MC code. */
7773 pVar->enmGstReg = enmGstReg;
7774 pVar->fRegAcquired = true;
7775 return idxReg;
7776 }
7777 Assert(pVar->uArgNo == UINT8_MAX);
7778
7779 /*
7780 * Because this is supposed to be the commit stage, we're just tag along with the
7781 * temporary register allocator and upgrade it to a variable register.
7782 */
7783 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
7784 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
7785 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
7786 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
7787 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
7788 pVar->idxReg = idxReg;
7789
7790 /*
7791 * Now we need to load the register value.
7792 */
7793 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7794 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7795 else
7796 {
7797 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7798 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7799 switch (pVar->cbVar)
7800 {
7801 case sizeof(uint64_t):
7802 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
7803 break;
7804 case sizeof(uint32_t):
7805 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
7806 break;
7807 case sizeof(uint16_t):
7808 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
7809 break;
7810 case sizeof(uint8_t):
7811 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
7812 break;
7813 default:
7814 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7815 }
7816 }
7817
7818 pVar->fRegAcquired = true;
7819 return idxReg;
7820}
7821
7822
7823/**
7824 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
7825 *
7826 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
7827 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
7828 * requirement of flushing anything in volatile host registers when making a
7829 * call.
7830 *
7831 * @returns New @a off value.
7832 * @param pReNative The recompiler state.
7833 * @param off The code buffer position.
7834 * @param fHstRegsNotToSave Set of registers not to save & restore.
7835 */
7836DECL_HIDDEN_THROW(uint32_t)
7837iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7838{
7839 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7840 if (fHstRegs)
7841 {
7842 do
7843 {
7844 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7845 fHstRegs &= ~RT_BIT_32(idxHstReg);
7846
7847 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7848 {
7849 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7850 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7851 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7852 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7853 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7854 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7855 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7856 {
7857 case kIemNativeVarKind_Stack:
7858 {
7859 /* Temporarily spill the variable register. */
7860 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7861 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7862 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7863 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7864 continue;
7865 }
7866
7867 case kIemNativeVarKind_Immediate:
7868 case kIemNativeVarKind_VarRef:
7869 case kIemNativeVarKind_GstRegRef:
7870 /* It is weird to have any of these loaded at this point. */
7871 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7872 continue;
7873
7874 case kIemNativeVarKind_End:
7875 case kIemNativeVarKind_Invalid:
7876 break;
7877 }
7878 AssertFailed();
7879 }
7880 else
7881 {
7882 /*
7883 * Allocate a temporary stack slot and spill the register to it.
7884 */
7885 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7886 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
7887 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7888 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
7889 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
7890 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7891 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7892 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7893 }
7894 } while (fHstRegs);
7895 }
7896#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7897 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
7898 if (fHstRegs)
7899 {
7900 do
7901 {
7902 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7903 fHstRegs &= ~RT_BIT_32(idxHstReg);
7904
7905 /*
7906 * Guest registers are flushed to CPUMCTX at the moment and don't need allocating a stack slot
7907 * which would be more difficult due to spanning multiple stack slots and different sizes
7908 * (besides we only have a limited amount of slots at the moment). Fixed temporary registers
7909 * don't need saving.
7910 */
7911 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp
7912 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved)
7913 continue;
7914
7915 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
7916
7917 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
7918 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7919 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7920 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7921 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
7922 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
7923 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
7924 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
7925 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7926 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7927 {
7928 case kIemNativeVarKind_Stack:
7929 {
7930 /* Temporarily spill the variable register. */
7931 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
7932 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7933 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7934 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7935 if (cbVar == sizeof(RTUINT128U))
7936 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7937 else
7938 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7939 continue;
7940 }
7941
7942 case kIemNativeVarKind_Immediate:
7943 case kIemNativeVarKind_VarRef:
7944 case kIemNativeVarKind_GstRegRef:
7945 /* It is weird to have any of these loaded at this point. */
7946 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7947 continue;
7948
7949 case kIemNativeVarKind_End:
7950 case kIemNativeVarKind_Invalid:
7951 break;
7952 }
7953 AssertFailed();
7954 } while (fHstRegs);
7955 }
7956#endif
7957 return off;
7958}
7959
7960
7961/**
7962 * Emit code to restore volatile registers after to a call to a helper.
7963 *
7964 * @returns New @a off value.
7965 * @param pReNative The recompiler state.
7966 * @param off The code buffer position.
7967 * @param fHstRegsNotToSave Set of registers not to save & restore.
7968 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
7969 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
7970 */
7971DECL_HIDDEN_THROW(uint32_t)
7972iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7973{
7974 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7975 if (fHstRegs)
7976 {
7977 do
7978 {
7979 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7980 fHstRegs &= ~RT_BIT_32(idxHstReg);
7981
7982 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7983 {
7984 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7985 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7986 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7987 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7988 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7989 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7990 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7991 {
7992 case kIemNativeVarKind_Stack:
7993 {
7994 /* Unspill the variable register. */
7995 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7996 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
7997 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7998 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7999 continue;
8000 }
8001
8002 case kIemNativeVarKind_Immediate:
8003 case kIemNativeVarKind_VarRef:
8004 case kIemNativeVarKind_GstRegRef:
8005 /* It is weird to have any of these loaded at this point. */
8006 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8007 continue;
8008
8009 case kIemNativeVarKind_End:
8010 case kIemNativeVarKind_Invalid:
8011 break;
8012 }
8013 AssertFailed();
8014 }
8015 else
8016 {
8017 /*
8018 * Restore from temporary stack slot.
8019 */
8020 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
8021 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
8022 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
8023 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
8024
8025 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8026 }
8027 } while (fHstRegs);
8028 }
8029#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8030 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
8031 if (fHstRegs)
8032 {
8033 do
8034 {
8035 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8036 fHstRegs &= ~RT_BIT_32(idxHstReg);
8037
8038 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp
8039 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved)
8040 continue;
8041 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
8042
8043 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
8044 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8045 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8046 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8047 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
8048 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
8049 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
8050 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
8051 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8052 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8053 {
8054 case kIemNativeVarKind_Stack:
8055 {
8056 /* Unspill the variable register. */
8057 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
8058 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8059 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
8060 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8061
8062 if (cbVar == sizeof(RTUINT128U))
8063 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8064 else
8065 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8066 continue;
8067 }
8068
8069 case kIemNativeVarKind_Immediate:
8070 case kIemNativeVarKind_VarRef:
8071 case kIemNativeVarKind_GstRegRef:
8072 /* It is weird to have any of these loaded at this point. */
8073 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8074 continue;
8075
8076 case kIemNativeVarKind_End:
8077 case kIemNativeVarKind_Invalid:
8078 break;
8079 }
8080 AssertFailed();
8081 } while (fHstRegs);
8082 }
8083#endif
8084 return off;
8085}
8086
8087
8088/**
8089 * Worker that frees the stack slots for variable @a idxVar if any allocated.
8090 *
8091 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
8092 *
8093 * ASSUMES that @a idxVar is valid and unpacked.
8094 */
8095DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8096{
8097 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
8098 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
8099 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
8100 {
8101 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
8102 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
8103 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
8104 Assert(cSlots > 0);
8105 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
8106 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
8107 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
8108 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
8109 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
8110 }
8111 else
8112 Assert(idxStackSlot == UINT8_MAX);
8113}
8114
8115
8116/**
8117 * Worker that frees a single variable.
8118 *
8119 * ASSUMES that @a idxVar is valid and unpacked.
8120 */
8121DECLHIDDEN(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8122{
8123 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
8124 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
8125 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
8126
8127 /* Free the host register first if any assigned. */
8128 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8129#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8130 if ( idxHstReg != UINT8_MAX
8131 && pReNative->Core.aVars[idxVar].fSimdReg)
8132 {
8133 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8134 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8135 pReNative->Core.aHstSimdRegs[idxHstReg].idxVar = UINT8_MAX;
8136 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
8137 }
8138 else
8139#endif
8140 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8141 {
8142 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8143 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8144 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8145 }
8146
8147 /* Free argument mapping. */
8148 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
8149 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
8150 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
8151
8152 /* Free the stack slots. */
8153 iemNativeVarFreeStackSlots(pReNative, idxVar);
8154
8155 /* Free the actual variable. */
8156 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
8157 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8158}
8159
8160
8161/**
8162 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
8163 */
8164DECLHIDDEN(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
8165{
8166 while (bmVars != 0)
8167 {
8168 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8169 bmVars &= ~RT_BIT_32(idxVar);
8170
8171#if 1 /** @todo optimize by simplifying this later... */
8172 iemNativeVarFreeOneWorker(pReNative, idxVar);
8173#else
8174 /* Only need to free the host register, the rest is done as bulk updates below. */
8175 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8176 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8177 {
8178 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8179 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8180 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8181 }
8182#endif
8183 }
8184#if 0 /** @todo optimize by simplifying this later... */
8185 pReNative->Core.bmVars = 0;
8186 pReNative->Core.bmStack = 0;
8187 pReNative->Core.u64ArgVars = UINT64_MAX;
8188#endif
8189}
8190
8191
8192
8193/*********************************************************************************************************************************
8194* Emitters for IEM_MC_CALL_CIMPL_XXX *
8195*********************************************************************************************************************************/
8196
8197/**
8198 * Emits code to load a reference to the given guest register into @a idxGprDst.
8199 */
8200DECL_HIDDEN_THROW(uint32_t)
8201iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
8202 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
8203{
8204#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8205 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
8206#endif
8207
8208 /*
8209 * Get the offset relative to the CPUMCTX structure.
8210 */
8211 uint32_t offCpumCtx;
8212 switch (enmClass)
8213 {
8214 case kIemNativeGstRegRef_Gpr:
8215 Assert(idxRegInClass < 16);
8216 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
8217 break;
8218
8219 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
8220 Assert(idxRegInClass < 4);
8221 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
8222 break;
8223
8224 case kIemNativeGstRegRef_EFlags:
8225 Assert(idxRegInClass == 0);
8226 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
8227 break;
8228
8229 case kIemNativeGstRegRef_MxCsr:
8230 Assert(idxRegInClass == 0);
8231 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
8232 break;
8233
8234 case kIemNativeGstRegRef_FpuReg:
8235 Assert(idxRegInClass < 8);
8236 AssertFailed(); /** @todo what kind of indexing? */
8237 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8238 break;
8239
8240 case kIemNativeGstRegRef_MReg:
8241 Assert(idxRegInClass < 8);
8242 AssertFailed(); /** @todo what kind of indexing? */
8243 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8244 break;
8245
8246 case kIemNativeGstRegRef_XReg:
8247 Assert(idxRegInClass < 16);
8248 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
8249 break;
8250
8251 case kIemNativeGstRegRef_X87: /* Not a register actually but we would just duplicate code otherwise. */
8252 Assert(idxRegInClass == 0);
8253 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87);
8254 break;
8255
8256 case kIemNativeGstRegRef_XState: /* Not a register actually but we would just duplicate code otherwise. */
8257 Assert(idxRegInClass == 0);
8258 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState);
8259 break;
8260
8261 default:
8262 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
8263 }
8264
8265 /*
8266 * Load the value into the destination register.
8267 */
8268#ifdef RT_ARCH_AMD64
8269 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
8270
8271#elif defined(RT_ARCH_ARM64)
8272 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8273 Assert(offCpumCtx < 4096);
8274 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
8275
8276#else
8277# error "Port me!"
8278#endif
8279
8280 return off;
8281}
8282
8283
8284/**
8285 * Common code for CIMPL and AIMPL calls.
8286 *
8287 * These are calls that uses argument variables and such. They should not be
8288 * confused with internal calls required to implement an MC operation,
8289 * like a TLB load and similar.
8290 *
8291 * Upon return all that is left to do is to load any hidden arguments and
8292 * perform the call. All argument variables are freed.
8293 *
8294 * @returns New code buffer offset; throws VBox status code on error.
8295 * @param pReNative The native recompile state.
8296 * @param off The code buffer offset.
8297 * @param cArgs The total nubmer of arguments (includes hidden
8298 * count).
8299 * @param cHiddenArgs The number of hidden arguments. The hidden
8300 * arguments must not have any variable declared for
8301 * them, whereas all the regular arguments must
8302 * (tstIEMCheckMc ensures this).
8303 */
8304DECL_HIDDEN_THROW(uint32_t)
8305iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
8306{
8307#ifdef VBOX_STRICT
8308 /*
8309 * Assert sanity.
8310 */
8311 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
8312 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
8313 for (unsigned i = 0; i < cHiddenArgs; i++)
8314 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
8315 for (unsigned i = cHiddenArgs; i < cArgs; i++)
8316 {
8317 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
8318 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
8319 }
8320 iemNativeRegAssertSanity(pReNative);
8321#endif
8322
8323 /* We don't know what the called function makes use of, so flush any pending register writes. */
8324 off = iemNativeRegFlushPendingWrites(pReNative, off);
8325
8326 /*
8327 * Before we do anything else, go over variables that are referenced and
8328 * make sure they are not in a register.
8329 */
8330 uint32_t bmVars = pReNative->Core.bmVars;
8331 if (bmVars)
8332 {
8333 do
8334 {
8335 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8336 bmVars &= ~RT_BIT_32(idxVar);
8337
8338 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
8339 {
8340 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
8341#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8342 if ( idxRegOld != UINT8_MAX
8343 && pReNative->Core.aVars[idxVar].fSimdReg)
8344 {
8345 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8346 Assert(pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U) || pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT256U));
8347
8348 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8349 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8350 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8351 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8352 if (pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U))
8353 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8354 else
8355 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8356
8357 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
8358 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
8359
8360 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8361 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
8362 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8363 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
8364 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
8365 }
8366 else
8367#endif
8368 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
8369 {
8370 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8371 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8372 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8373 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8374 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8375
8376 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8377 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
8378 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8379 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
8380 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
8381 }
8382 }
8383 } while (bmVars != 0);
8384#if 0 //def VBOX_STRICT
8385 iemNativeRegAssertSanity(pReNative);
8386#endif
8387 }
8388
8389 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
8390
8391 /*
8392 * First, go over the host registers that will be used for arguments and make
8393 * sure they either hold the desired argument or are free.
8394 */
8395 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
8396 {
8397 for (uint32_t i = 0; i < cRegArgs; i++)
8398 {
8399 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8400 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8401 {
8402 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
8403 {
8404 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
8405 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8406 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8407 Assert(pVar->idxReg == idxArgReg);
8408 uint8_t const uArgNo = pVar->uArgNo;
8409 if (uArgNo == i)
8410 { /* prefect */ }
8411 /* The variable allocator logic should make sure this is impossible,
8412 except for when the return register is used as a parameter (ARM,
8413 but not x86). */
8414#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
8415 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
8416 {
8417# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8418# error "Implement this"
8419# endif
8420 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
8421 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
8422 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
8423 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8424 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
8425 }
8426#endif
8427 else
8428 {
8429 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8430
8431 if (pVar->enmKind == kIemNativeVarKind_Stack)
8432 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
8433 else
8434 {
8435 /* just free it, can be reloaded if used again */
8436 pVar->idxReg = UINT8_MAX;
8437 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
8438 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
8439 }
8440 }
8441 }
8442 else
8443 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
8444 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
8445 }
8446 }
8447#if 0 //def VBOX_STRICT
8448 iemNativeRegAssertSanity(pReNative);
8449#endif
8450 }
8451
8452 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
8453
8454#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8455 /*
8456 * If there are any stack arguments, make sure they are in their place as well.
8457 *
8458 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
8459 * the caller) be loading it later and it must be free (see first loop).
8460 */
8461 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
8462 {
8463 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
8464 {
8465 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8466 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
8467 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8468 {
8469 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
8470 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
8471 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
8472 pVar->idxReg = UINT8_MAX;
8473 }
8474 else
8475 {
8476 /* Use ARG0 as temp for stuff we need registers for. */
8477 switch (pVar->enmKind)
8478 {
8479 case kIemNativeVarKind_Stack:
8480 {
8481 uint8_t const idxStackSlot = pVar->idxStackSlot;
8482 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8483 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
8484 iemNativeStackCalcBpDisp(idxStackSlot));
8485 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8486 continue;
8487 }
8488
8489 case kIemNativeVarKind_Immediate:
8490 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
8491 continue;
8492
8493 case kIemNativeVarKind_VarRef:
8494 {
8495 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8496 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8497 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8498 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8499 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8500# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8501 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8502 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8503 if ( fSimdReg
8504 && idxRegOther != UINT8_MAX)
8505 {
8506 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8507 if (cbVar == sizeof(RTUINT128U))
8508 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8509 else
8510 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8511 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8512 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8513 }
8514 else
8515# endif
8516 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8517 {
8518 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8519 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8520 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8521 }
8522 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8523 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8524 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
8525 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8526 continue;
8527 }
8528
8529 case kIemNativeVarKind_GstRegRef:
8530 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
8531 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8532 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8533 continue;
8534
8535 case kIemNativeVarKind_Invalid:
8536 case kIemNativeVarKind_End:
8537 break;
8538 }
8539 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8540 }
8541 }
8542# if 0 //def VBOX_STRICT
8543 iemNativeRegAssertSanity(pReNative);
8544# endif
8545 }
8546#else
8547 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
8548#endif
8549
8550 /*
8551 * Make sure the argument variables are loaded into their respective registers.
8552 *
8553 * We can optimize this by ASSUMING that any register allocations are for
8554 * registeres that have already been loaded and are ready. The previous step
8555 * saw to that.
8556 */
8557 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
8558 {
8559 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8560 {
8561 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8562 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8563 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
8564 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
8565 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
8566 else
8567 {
8568 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8569 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8570 {
8571 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
8572 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
8573 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
8574 | RT_BIT_32(idxArgReg);
8575 pVar->idxReg = idxArgReg;
8576 }
8577 else
8578 {
8579 /* Use ARG0 as temp for stuff we need registers for. */
8580 switch (pVar->enmKind)
8581 {
8582 case kIemNativeVarKind_Stack:
8583 {
8584 uint8_t const idxStackSlot = pVar->idxStackSlot;
8585 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8586 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
8587 continue;
8588 }
8589
8590 case kIemNativeVarKind_Immediate:
8591 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
8592 continue;
8593
8594 case kIemNativeVarKind_VarRef:
8595 {
8596 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8597 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8598 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
8599 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8600 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8601 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8602#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8603 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8604 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8605 if ( fSimdReg
8606 && idxRegOther != UINT8_MAX)
8607 {
8608 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8609 if (cbVar == sizeof(RTUINT128U))
8610 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8611 else
8612 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8613 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8614 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8615 }
8616 else
8617#endif
8618 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8619 {
8620 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8621 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8622 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8623 }
8624 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8625 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8626 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
8627 continue;
8628 }
8629
8630 case kIemNativeVarKind_GstRegRef:
8631 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
8632 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8633 continue;
8634
8635 case kIemNativeVarKind_Invalid:
8636 case kIemNativeVarKind_End:
8637 break;
8638 }
8639 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8640 }
8641 }
8642 }
8643#if 0 //def VBOX_STRICT
8644 iemNativeRegAssertSanity(pReNative);
8645#endif
8646 }
8647#ifdef VBOX_STRICT
8648 else
8649 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8650 {
8651 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
8652 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
8653 }
8654#endif
8655
8656 /*
8657 * Free all argument variables (simplified).
8658 * Their lifetime always expires with the call they are for.
8659 */
8660 /** @todo Make the python script check that arguments aren't used after
8661 * IEM_MC_CALL_XXXX. */
8662 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
8663 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
8664 * an argument value. There is also some FPU stuff. */
8665 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
8666 {
8667 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
8668 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8669
8670 /* no need to free registers: */
8671 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
8672 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
8673 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
8674 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
8675 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
8676 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
8677
8678 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
8679 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8680 iemNativeVarFreeStackSlots(pReNative, idxVar);
8681 }
8682 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8683
8684 /*
8685 * Flush volatile registers as we make the call.
8686 */
8687 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
8688
8689 return off;
8690}
8691
8692
8693
8694/*********************************************************************************************************************************
8695* TLB Lookup. *
8696*********************************************************************************************************************************/
8697
8698/**
8699 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
8700 */
8701DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
8702{
8703 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
8704 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
8705 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
8706 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
8707
8708 /* Do the lookup manually. */
8709 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
8710 uint64_t const uTag = IEMTLB_CALC_TAG( &pVCpu->iem.s.DataTlb, GCPtrFlat);
8711 PIEMTLBENTRY const pTlbe = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
8712 if (RT_LIKELY(pTlbe->uTag == uTag))
8713 {
8714 /*
8715 * Check TLB page table level access flags.
8716 */
8717 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
8718 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
8719 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
8720 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
8721 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
8722 | IEMTLBE_F_PG_UNASSIGNED
8723 | IEMTLBE_F_PT_NO_ACCESSED
8724 | fNoWriteNoDirty | fNoUser);
8725 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
8726 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
8727 {
8728 /*
8729 * Return the address.
8730 */
8731 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
8732 if ((uintptr_t)pbAddr == uResult)
8733 return;
8734 RT_NOREF(cbMem);
8735 AssertFailed();
8736 }
8737 else
8738 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
8739 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
8740 }
8741 else
8742 AssertFailed();
8743 RT_BREAKPOINT();
8744}
8745
8746/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
8747
8748
8749
8750/*********************************************************************************************************************************
8751* Recompiler Core. *
8752*********************************************************************************************************************************/
8753
8754/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
8755static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
8756{
8757 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
8758 pDis->cbCachedInstr += cbMaxRead;
8759 RT_NOREF(cbMinRead);
8760 return VERR_NO_DATA;
8761}
8762
8763
8764DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
8765{
8766 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
8767 {
8768#define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */
8769 ENTRY(fLocalForcedActions),
8770 ENTRY(iem.s.rcPassUp),
8771 ENTRY(iem.s.fExec),
8772 ENTRY(iem.s.pbInstrBuf),
8773 ENTRY(iem.s.uInstrBufPc),
8774 ENTRY(iem.s.GCPhysInstrBuf),
8775 ENTRY(iem.s.cbInstrBufTotal),
8776 ENTRY(iem.s.idxTbCurInstr),
8777#ifdef VBOX_WITH_STATISTICS
8778 ENTRY(iem.s.StatNativeTlbHitsForFetch),
8779 ENTRY(iem.s.StatNativeTlbHitsForStore),
8780 ENTRY(iem.s.StatNativeTlbHitsForStack),
8781 ENTRY(iem.s.StatNativeTlbHitsForMapped),
8782 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
8783 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
8784 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
8785 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
8786#endif
8787 ENTRY(iem.s.DataTlb.aEntries),
8788 ENTRY(iem.s.DataTlb.uTlbRevision),
8789 ENTRY(iem.s.DataTlb.uTlbPhysRev),
8790 ENTRY(iem.s.DataTlb.cTlbHits),
8791 ENTRY(iem.s.CodeTlb.aEntries),
8792 ENTRY(iem.s.CodeTlb.uTlbRevision),
8793 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
8794 ENTRY(iem.s.CodeTlb.cTlbHits),
8795 ENTRY(pVMR3),
8796 ENTRY(cpum.GstCtx.rax),
8797 ENTRY(cpum.GstCtx.ah),
8798 ENTRY(cpum.GstCtx.rcx),
8799 ENTRY(cpum.GstCtx.ch),
8800 ENTRY(cpum.GstCtx.rdx),
8801 ENTRY(cpum.GstCtx.dh),
8802 ENTRY(cpum.GstCtx.rbx),
8803 ENTRY(cpum.GstCtx.bh),
8804 ENTRY(cpum.GstCtx.rsp),
8805 ENTRY(cpum.GstCtx.rbp),
8806 ENTRY(cpum.GstCtx.rsi),
8807 ENTRY(cpum.GstCtx.rdi),
8808 ENTRY(cpum.GstCtx.r8),
8809 ENTRY(cpum.GstCtx.r9),
8810 ENTRY(cpum.GstCtx.r10),
8811 ENTRY(cpum.GstCtx.r11),
8812 ENTRY(cpum.GstCtx.r12),
8813 ENTRY(cpum.GstCtx.r13),
8814 ENTRY(cpum.GstCtx.r14),
8815 ENTRY(cpum.GstCtx.r15),
8816 ENTRY(cpum.GstCtx.es.Sel),
8817 ENTRY(cpum.GstCtx.es.u64Base),
8818 ENTRY(cpum.GstCtx.es.u32Limit),
8819 ENTRY(cpum.GstCtx.es.Attr),
8820 ENTRY(cpum.GstCtx.cs.Sel),
8821 ENTRY(cpum.GstCtx.cs.u64Base),
8822 ENTRY(cpum.GstCtx.cs.u32Limit),
8823 ENTRY(cpum.GstCtx.cs.Attr),
8824 ENTRY(cpum.GstCtx.ss.Sel),
8825 ENTRY(cpum.GstCtx.ss.u64Base),
8826 ENTRY(cpum.GstCtx.ss.u32Limit),
8827 ENTRY(cpum.GstCtx.ss.Attr),
8828 ENTRY(cpum.GstCtx.ds.Sel),
8829 ENTRY(cpum.GstCtx.ds.u64Base),
8830 ENTRY(cpum.GstCtx.ds.u32Limit),
8831 ENTRY(cpum.GstCtx.ds.Attr),
8832 ENTRY(cpum.GstCtx.fs.Sel),
8833 ENTRY(cpum.GstCtx.fs.u64Base),
8834 ENTRY(cpum.GstCtx.fs.u32Limit),
8835 ENTRY(cpum.GstCtx.fs.Attr),
8836 ENTRY(cpum.GstCtx.gs.Sel),
8837 ENTRY(cpum.GstCtx.gs.u64Base),
8838 ENTRY(cpum.GstCtx.gs.u32Limit),
8839 ENTRY(cpum.GstCtx.gs.Attr),
8840 ENTRY(cpum.GstCtx.rip),
8841 ENTRY(cpum.GstCtx.eflags),
8842 ENTRY(cpum.GstCtx.uRipInhibitInt),
8843 ENTRY(cpum.GstCtx.cr0),
8844 ENTRY(cpum.GstCtx.cr4),
8845 ENTRY(cpum.GstCtx.aXcr[0]),
8846 ENTRY(cpum.GstCtx.aXcr[1]),
8847#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8848 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
8849 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
8850 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
8851 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
8852 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
8853 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
8854 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
8855 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
8856 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
8857 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
8858 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
8859 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
8860 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
8861 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
8862 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
8863 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
8864 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
8865 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
8866 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
8867 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
8868 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
8869 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
8870 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
8871 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
8872 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
8873 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
8874 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
8875 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
8876 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
8877 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
8878 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
8879 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
8880#endif
8881#undef ENTRY
8882 };
8883#ifdef VBOX_STRICT
8884 static bool s_fOrderChecked = false;
8885 if (!s_fOrderChecked)
8886 {
8887 s_fOrderChecked = true;
8888 uint32_t offPrev = s_aMembers[0].off;
8889 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
8890 {
8891 Assert(s_aMembers[i].off > offPrev);
8892 offPrev = s_aMembers[i].off;
8893 }
8894 }
8895#endif
8896
8897 /*
8898 * Binary lookup.
8899 */
8900 unsigned iStart = 0;
8901 unsigned iEnd = RT_ELEMENTS(s_aMembers);
8902 for (;;)
8903 {
8904 unsigned const iCur = iStart + (iEnd - iStart) / 2;
8905 uint32_t const offCur = s_aMembers[iCur].off;
8906 if (off < offCur)
8907 {
8908 if (iCur != iStart)
8909 iEnd = iCur;
8910 else
8911 break;
8912 }
8913 else if (off > offCur)
8914 {
8915 if (iCur + 1 < iEnd)
8916 iStart = iCur + 1;
8917 else
8918 break;
8919 }
8920 else
8921 return s_aMembers[iCur].pszName;
8922 }
8923#ifdef VBOX_WITH_STATISTICS
8924 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
8925 return "iem.s.acThreadedFuncStats[iFn]";
8926#endif
8927 return NULL;
8928}
8929
8930
8931DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
8932{
8933 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
8934#if defined(RT_ARCH_AMD64)
8935 static const char * const a_apszMarkers[] =
8936 {
8937 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
8938 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
8939 };
8940#endif
8941
8942 char szDisBuf[512];
8943 DISSTATE Dis;
8944 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
8945 uint32_t const cNative = pTb->Native.cInstructions;
8946 uint32_t offNative = 0;
8947#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8948 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
8949#endif
8950 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
8951 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
8952 : DISCPUMODE_64BIT;
8953#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8954 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
8955#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8956 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
8957#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8958# error "Port me"
8959#else
8960 csh hDisasm = ~(size_t)0;
8961# if defined(RT_ARCH_AMD64)
8962 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
8963# elif defined(RT_ARCH_ARM64)
8964 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
8965# else
8966# error "Port me"
8967# endif
8968 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
8969
8970 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
8971 //Assert(rcCs == CS_ERR_OK);
8972#endif
8973
8974 /*
8975 * Print TB info.
8976 */
8977 pHlp->pfnPrintf(pHlp,
8978 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
8979 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
8980 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
8981 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
8982#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8983 if (pDbgInfo && pDbgInfo->cEntries > 1)
8984 {
8985 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
8986
8987 /*
8988 * This disassembly is driven by the debug info which follows the native
8989 * code and indicates when it starts with the next guest instructions,
8990 * where labels are and such things.
8991 */
8992 uint32_t idxThreadedCall = 0;
8993 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
8994 uint8_t idxRange = UINT8_MAX;
8995 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
8996 uint32_t offRange = 0;
8997 uint32_t offOpcodes = 0;
8998 uint32_t const cbOpcodes = pTb->cbOpcodes;
8999 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
9000 uint32_t const cDbgEntries = pDbgInfo->cEntries;
9001 uint32_t iDbgEntry = 1;
9002 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
9003
9004 while (offNative < cNative)
9005 {
9006 /* If we're at or have passed the point where the next chunk of debug
9007 info starts, process it. */
9008 if (offDbgNativeNext <= offNative)
9009 {
9010 offDbgNativeNext = UINT32_MAX;
9011 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
9012 {
9013 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
9014 {
9015 case kIemTbDbgEntryType_GuestInstruction:
9016 {
9017 /* Did the exec flag change? */
9018 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
9019 {
9020 pHlp->pfnPrintf(pHlp,
9021 " fExec change %#08x -> %#08x %s\n",
9022 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9023 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9024 szDisBuf, sizeof(szDisBuf)));
9025 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
9026 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9027 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9028 : DISCPUMODE_64BIT;
9029 }
9030
9031 /* New opcode range? We need to fend up a spurious debug info entry here for cases
9032 where the compilation was aborted before the opcode was recorded and the actual
9033 instruction was translated to a threaded call. This may happen when we run out
9034 of ranges, or when some complicated interrupts/FFs are found to be pending or
9035 similar. So, we just deal with it here rather than in the compiler code as it
9036 is a lot simpler to do here. */
9037 if ( idxRange == UINT8_MAX
9038 || idxRange >= cRanges
9039 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
9040 {
9041 idxRange += 1;
9042 if (idxRange < cRanges)
9043 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
9044 else
9045 continue;
9046 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
9047 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
9048 + (pTb->aRanges[idxRange].idxPhysPage == 0
9049 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9050 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
9051 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9052 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
9053 pTb->aRanges[idxRange].idxPhysPage);
9054 GCPhysPc += offRange;
9055 }
9056
9057 /* Disassemble the instruction. */
9058 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
9059 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
9060 uint32_t cbInstr = 1;
9061 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9062 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
9063 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9064 if (RT_SUCCESS(rc))
9065 {
9066 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9067 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9068 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9069 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9070
9071 static unsigned const s_offMarker = 55;
9072 static char const s_szMarker[] = " ; <--- guest";
9073 if (cch < s_offMarker)
9074 {
9075 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
9076 cch = s_offMarker;
9077 }
9078 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
9079 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
9080
9081 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
9082 }
9083 else
9084 {
9085 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
9086 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
9087 cbInstr = 1;
9088 }
9089 GCPhysPc += cbInstr;
9090 offOpcodes += cbInstr;
9091 offRange += cbInstr;
9092 continue;
9093 }
9094
9095 case kIemTbDbgEntryType_ThreadedCall:
9096 pHlp->pfnPrintf(pHlp,
9097 " Call #%u to %s (%u args) - %s\n",
9098 idxThreadedCall,
9099 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9100 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9101 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
9102 idxThreadedCall++;
9103 continue;
9104
9105 case kIemTbDbgEntryType_GuestRegShadowing:
9106 {
9107 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9108 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
9109 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
9110 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
9111 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9112 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
9113 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
9114 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
9115 else
9116 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
9117 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
9118 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9119 continue;
9120 }
9121
9122#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9123 case kIemTbDbgEntryType_GuestSimdRegShadowing:
9124 {
9125 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9126 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
9127 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
9128 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
9129 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9130 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
9131 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
9132 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
9133 else
9134 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
9135 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
9136 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9137 continue;
9138 }
9139#endif
9140
9141 case kIemTbDbgEntryType_Label:
9142 {
9143 const char *pszName = "what_the_fudge";
9144 const char *pszComment = "";
9145 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
9146 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
9147 {
9148 case kIemNativeLabelType_Return: pszName = "Return"; break;
9149 case kIemNativeLabelType_ReturnBreak: pszName = "ReturnBreak"; break;
9150 case kIemNativeLabelType_ReturnWithFlags: pszName = "ReturnWithFlags"; break;
9151 case kIemNativeLabelType_NonZeroRetOrPassUp: pszName = "NonZeroRetOrPassUp"; break;
9152 case kIemNativeLabelType_RaiseDe: pszName = "RaiseDe"; break;
9153 case kIemNativeLabelType_RaiseUd: pszName = "RaiseUd"; break;
9154 case kIemNativeLabelType_RaiseSseRelated: pszName = "RaiseSseRelated"; break;
9155 case kIemNativeLabelType_RaiseAvxRelated: pszName = "RaiseAvxRelated"; break;
9156 case kIemNativeLabelType_RaiseNm: pszName = "RaiseNm"; break;
9157 case kIemNativeLabelType_RaiseGp0: pszName = "RaiseGp0"; break;
9158 case kIemNativeLabelType_RaiseMf: pszName = "RaiseMf"; break;
9159 case kIemNativeLabelType_RaiseXf: pszName = "RaiseXf"; break;
9160 case kIemNativeLabelType_ObsoleteTb: pszName = "ObsoleteTb"; break;
9161 case kIemNativeLabelType_NeedCsLimChecking: pszName = "NeedCsLimChecking"; break;
9162 case kIemNativeLabelType_CheckBranchMiss: pszName = "CheckBranchMiss"; break;
9163 case kIemNativeLabelType_If:
9164 pszName = "If";
9165 fNumbered = true;
9166 break;
9167 case kIemNativeLabelType_Else:
9168 pszName = "Else";
9169 fNumbered = true;
9170 pszComment = " ; regs state restored pre-if-block";
9171 break;
9172 case kIemNativeLabelType_Endif:
9173 pszName = "Endif";
9174 fNumbered = true;
9175 break;
9176 case kIemNativeLabelType_CheckIrq:
9177 pszName = "CheckIrq_CheckVM";
9178 fNumbered = true;
9179 break;
9180 case kIemNativeLabelType_TlbLookup:
9181 pszName = "TlbLookup";
9182 fNumbered = true;
9183 break;
9184 case kIemNativeLabelType_TlbMiss:
9185 pszName = "TlbMiss";
9186 fNumbered = true;
9187 break;
9188 case kIemNativeLabelType_TlbDone:
9189 pszName = "TlbDone";
9190 fNumbered = true;
9191 break;
9192 case kIemNativeLabelType_Invalid:
9193 case kIemNativeLabelType_End:
9194 break;
9195 }
9196 if (fNumbered)
9197 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
9198 else
9199 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
9200 continue;
9201 }
9202
9203 case kIemTbDbgEntryType_NativeOffset:
9204 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
9205 Assert(offDbgNativeNext > offNative);
9206 break;
9207
9208#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
9209 case kIemTbDbgEntryType_DelayedPcUpdate:
9210 pHlp->pfnPrintf(pHlp,
9211 " Updating guest PC value by %u (cInstrSkipped=%u)\n",
9212 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
9213 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
9214 continue;
9215#endif
9216
9217 default:
9218 AssertFailed();
9219 }
9220 iDbgEntry++;
9221 break;
9222 }
9223 }
9224
9225 /*
9226 * Disassemble the next native instruction.
9227 */
9228 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9229# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9230 uint32_t cbInstr = sizeof(paNative[0]);
9231 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9232 if (RT_SUCCESS(rc))
9233 {
9234# if defined(RT_ARCH_AMD64)
9235 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9236 {
9237 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9238 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9239 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9240 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9241 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9242 uInfo & 0x8000 ? "recompiled" : "todo");
9243 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9244 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9245 else
9246 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9247 }
9248 else
9249# endif
9250 {
9251 const char *pszAnnotation = NULL;
9252# ifdef RT_ARCH_AMD64
9253 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9254 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9255 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9256 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9257 PCDISOPPARAM pMemOp;
9258 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))
9259 pMemOp = &Dis.Param1;
9260 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))
9261 pMemOp = &Dis.Param2;
9262 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))
9263 pMemOp = &Dis.Param3;
9264 else
9265 pMemOp = NULL;
9266 if ( pMemOp
9267 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
9268 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
9269 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
9270 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
9271
9272#elif defined(RT_ARCH_ARM64)
9273 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9274 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9275 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9276# else
9277# error "Port me"
9278# endif
9279 if (pszAnnotation)
9280 {
9281 static unsigned const s_offAnnotation = 55;
9282 size_t const cchAnnotation = strlen(pszAnnotation);
9283 size_t cchDis = strlen(szDisBuf);
9284 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
9285 {
9286 if (cchDis < s_offAnnotation)
9287 {
9288 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
9289 cchDis = s_offAnnotation;
9290 }
9291 szDisBuf[cchDis++] = ' ';
9292 szDisBuf[cchDis++] = ';';
9293 szDisBuf[cchDis++] = ' ';
9294 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
9295 }
9296 }
9297 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9298 }
9299 }
9300 else
9301 {
9302# if defined(RT_ARCH_AMD64)
9303 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9304 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9305# elif defined(RT_ARCH_ARM64)
9306 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9307# else
9308# error "Port me"
9309# endif
9310 cbInstr = sizeof(paNative[0]);
9311 }
9312 offNative += cbInstr / sizeof(paNative[0]);
9313
9314# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9315 cs_insn *pInstr;
9316 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9317 (uintptr_t)pNativeCur, 1, &pInstr);
9318 if (cInstrs > 0)
9319 {
9320 Assert(cInstrs == 1);
9321 const char *pszAnnotation = NULL;
9322# if defined(RT_ARCH_ARM64)
9323 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
9324 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
9325 {
9326 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
9327 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
9328 char *psz = strchr(pInstr->op_str, '[');
9329 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
9330 {
9331 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
9332 int32_t off = -1;
9333 psz += 4;
9334 if (*psz == ']')
9335 off = 0;
9336 else if (*psz == ',')
9337 {
9338 psz = RTStrStripL(psz + 1);
9339 if (*psz == '#')
9340 off = RTStrToInt32(&psz[1]);
9341 /** @todo deal with index registers and LSL as well... */
9342 }
9343 if (off >= 0)
9344 pszAnnotation = iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
9345 }
9346 }
9347# endif
9348
9349 size_t const cchOp = strlen(pInstr->op_str);
9350# if defined(RT_ARCH_AMD64)
9351 if (pszAnnotation)
9352 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
9353 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
9354 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9355 else
9356 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9357 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9358
9359# else
9360 if (pszAnnotation)
9361 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
9362 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
9363 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9364 else
9365 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9366 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9367# endif
9368 offNative += pInstr->size / sizeof(*pNativeCur);
9369 cs_free(pInstr, cInstrs);
9370 }
9371 else
9372 {
9373# if defined(RT_ARCH_AMD64)
9374 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9375 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9376# else
9377 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9378# endif
9379 offNative++;
9380 }
9381# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9382 }
9383 }
9384 else
9385#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
9386 {
9387 /*
9388 * No debug info, just disassemble the x86 code and then the native code.
9389 *
9390 * First the guest code:
9391 */
9392 for (unsigned i = 0; i < pTb->cRanges; i++)
9393 {
9394 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
9395 + (pTb->aRanges[i].idxPhysPage == 0
9396 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9397 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
9398 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9399 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
9400 unsigned off = pTb->aRanges[i].offOpcodes;
9401 /** @todo this ain't working when crossing pages! */
9402 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
9403 while (off < cbOpcodes)
9404 {
9405 uint32_t cbInstr = 1;
9406 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9407 &pTb->pabOpcodes[off], cbOpcodes - off,
9408 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9409 if (RT_SUCCESS(rc))
9410 {
9411 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9412 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9413 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9414 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9415 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
9416 GCPhysPc += cbInstr;
9417 off += cbInstr;
9418 }
9419 else
9420 {
9421 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
9422 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
9423 break;
9424 }
9425 }
9426 }
9427
9428 /*
9429 * Then the native code:
9430 */
9431 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
9432 while (offNative < cNative)
9433 {
9434 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9435# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9436 uint32_t cbInstr = sizeof(paNative[0]);
9437 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9438 if (RT_SUCCESS(rc))
9439 {
9440# if defined(RT_ARCH_AMD64)
9441 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9442 {
9443 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9444 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9445 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9446 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9447 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9448 uInfo & 0x8000 ? "recompiled" : "todo");
9449 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9450 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9451 else
9452 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9453 }
9454 else
9455# endif
9456 {
9457# ifdef RT_ARCH_AMD64
9458 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9459 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9460 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9461 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9462# elif defined(RT_ARCH_ARM64)
9463 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9464 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9465 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9466# else
9467# error "Port me"
9468# endif
9469 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9470 }
9471 }
9472 else
9473 {
9474# if defined(RT_ARCH_AMD64)
9475 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9476 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9477# else
9478 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9479# endif
9480 cbInstr = sizeof(paNative[0]);
9481 }
9482 offNative += cbInstr / sizeof(paNative[0]);
9483
9484# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9485 cs_insn *pInstr;
9486 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9487 (uintptr_t)pNativeCur, 1, &pInstr);
9488 if (cInstrs > 0)
9489 {
9490 Assert(cInstrs == 1);
9491# if defined(RT_ARCH_AMD64)
9492 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9493 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9494# else
9495 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9496 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9497# endif
9498 offNative += pInstr->size / sizeof(*pNativeCur);
9499 cs_free(pInstr, cInstrs);
9500 }
9501 else
9502 {
9503# if defined(RT_ARCH_AMD64)
9504 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9505 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9506# else
9507 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9508# endif
9509 offNative++;
9510 }
9511# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9512 }
9513 }
9514
9515#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9516 /* Cleanup. */
9517 cs_close(&hDisasm);
9518#endif
9519}
9520
9521
9522/**
9523 * Recompiles the given threaded TB into a native one.
9524 *
9525 * In case of failure the translation block will be returned as-is.
9526 *
9527 * @returns pTb.
9528 * @param pVCpu The cross context virtual CPU structure of the calling
9529 * thread.
9530 * @param pTb The threaded translation to recompile to native.
9531 */
9532DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
9533{
9534 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
9535
9536 /*
9537 * The first time thru, we allocate the recompiler state, the other times
9538 * we just need to reset it before using it again.
9539 */
9540 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
9541 if (RT_LIKELY(pReNative))
9542 iemNativeReInit(pReNative, pTb);
9543 else
9544 {
9545 pReNative = iemNativeInit(pVCpu, pTb);
9546 AssertReturn(pReNative, pTb);
9547 }
9548
9549#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9550 /*
9551 * First do liveness analysis. This is done backwards.
9552 */
9553 {
9554 uint32_t idxCall = pTb->Thrd.cCalls;
9555 if (idxCall <= pReNative->cLivenessEntriesAlloc)
9556 { /* likely */ }
9557 else
9558 {
9559 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
9560 while (idxCall > cAlloc)
9561 cAlloc *= 2;
9562 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
9563 AssertReturn(pvNew, pTb);
9564 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
9565 pReNative->cLivenessEntriesAlloc = cAlloc;
9566 }
9567 AssertReturn(idxCall > 0, pTb);
9568 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
9569
9570 /* The initial (final) entry. */
9571 idxCall--;
9572 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
9573
9574 /* Loop backwards thru the calls and fill in the other entries. */
9575 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
9576 while (idxCall > 0)
9577 {
9578 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
9579 if (pfnLiveness)
9580 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
9581 else
9582 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
9583 pCallEntry--;
9584 idxCall--;
9585 }
9586
9587# ifdef VBOX_WITH_STATISTICS
9588 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
9589 to 'clobbered' rather that 'input'. */
9590 /** @todo */
9591# endif
9592 }
9593#endif
9594
9595 /*
9596 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
9597 * for aborting if an error happens.
9598 */
9599 uint32_t cCallsLeft = pTb->Thrd.cCalls;
9600#ifdef LOG_ENABLED
9601 uint32_t const cCallsOrg = cCallsLeft;
9602#endif
9603 uint32_t off = 0;
9604 int rc = VINF_SUCCESS;
9605 IEMNATIVE_TRY_SETJMP(pReNative, rc)
9606 {
9607 /*
9608 * Emit prolog code (fixed).
9609 */
9610 off = iemNativeEmitProlog(pReNative, off);
9611
9612 /*
9613 * Convert the calls to native code.
9614 */
9615#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9616 int32_t iGstInstr = -1;
9617#endif
9618#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
9619 uint32_t cThreadedCalls = 0;
9620 uint32_t cRecompiledCalls = 0;
9621#endif
9622#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9623 uint32_t idxCurCall = 0;
9624#endif
9625 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
9626 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
9627 while (cCallsLeft-- > 0)
9628 {
9629 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
9630#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9631 pReNative->idxCurCall = idxCurCall;
9632#endif
9633
9634 /*
9635 * Debug info, assembly markup and statistics.
9636 */
9637#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
9638 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
9639 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
9640#endif
9641#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9642 iemNativeDbgInfoAddNativeOffset(pReNative, off);
9643 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
9644 {
9645 if (iGstInstr < (int32_t)pTb->cInstructions)
9646 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
9647 else
9648 Assert(iGstInstr == pTb->cInstructions);
9649 iGstInstr = pCallEntry->idxInstr;
9650 }
9651 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
9652#endif
9653#if defined(VBOX_STRICT)
9654 off = iemNativeEmitMarker(pReNative, off,
9655 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
9656#endif
9657#if defined(VBOX_STRICT)
9658 iemNativeRegAssertSanity(pReNative);
9659#endif
9660#ifdef VBOX_WITH_STATISTICS
9661 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
9662#endif
9663
9664 /*
9665 * Actual work.
9666 */
9667 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
9668 pfnRecom ? "(recompiled)" : "(todo)"));
9669 if (pfnRecom) /** @todo stats on this. */
9670 {
9671 off = pfnRecom(pReNative, off, pCallEntry);
9672 STAM_REL_STATS({cRecompiledCalls++;});
9673 }
9674 else
9675 {
9676 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
9677 STAM_REL_STATS({cThreadedCalls++;});
9678 }
9679 Assert(off <= pReNative->cInstrBufAlloc);
9680 Assert(pReNative->cCondDepth == 0);
9681
9682#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
9683 if (LogIs2Enabled())
9684 {
9685 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
9686# ifndef IEMLIVENESS_EXTENDED_LAYOUT
9687 static const char s_achState[] = "CUXI";
9688# else
9689 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
9690# endif
9691
9692 char szGpr[17];
9693 for (unsigned i = 0; i < 16; i++)
9694 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
9695 szGpr[16] = '\0';
9696
9697 char szSegBase[X86_SREG_COUNT + 1];
9698 char szSegLimit[X86_SREG_COUNT + 1];
9699 char szSegAttrib[X86_SREG_COUNT + 1];
9700 char szSegSel[X86_SREG_COUNT + 1];
9701 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
9702 {
9703 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
9704 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
9705 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
9706 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
9707 }
9708 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
9709 = szSegSel[X86_SREG_COUNT] = '\0';
9710
9711 char szEFlags[8];
9712 for (unsigned i = 0; i < 7; i++)
9713 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
9714 szEFlags[7] = '\0';
9715
9716 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
9717 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
9718 }
9719#endif
9720
9721 /*
9722 * Advance.
9723 */
9724 pCallEntry++;
9725#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9726 idxCurCall++;
9727#endif
9728 }
9729
9730 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
9731 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
9732 if (!cThreadedCalls)
9733 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
9734
9735 /*
9736 * Emit the epilog code.
9737 */
9738 uint32_t idxReturnLabel;
9739 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
9740
9741 /*
9742 * Generate special jump labels.
9743 */
9744 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
9745 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
9746 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
9747 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
9748
9749 /*
9750 * Generate simple TB tail labels that just calls a help with a pVCpu
9751 * arg and either return or longjmps/throws a non-zero status.
9752 *
9753 * The array entries must be ordered by enmLabel value so we can index
9754 * using fTailLabels bit numbers.
9755 */
9756 typedef IEM_DECL_NATIVE_HLP_PTR(int, PFNIEMNATIVESIMPLETAILLABELCALL,(PVMCPUCC pVCpu));
9757 static struct
9758 {
9759 IEMNATIVELABELTYPE enmLabel;
9760 PFNIEMNATIVESIMPLETAILLABELCALL pfnCallback;
9761 } const g_aSimpleTailLabels[] =
9762 {
9763 { kIemNativeLabelType_Invalid, NULL },
9764 { kIemNativeLabelType_RaiseDe, iemNativeHlpExecRaiseDe },
9765 { kIemNativeLabelType_RaiseUd, iemNativeHlpExecRaiseUd },
9766 { kIemNativeLabelType_RaiseSseRelated, iemNativeHlpExecRaiseSseRelated },
9767 { kIemNativeLabelType_RaiseAvxRelated, iemNativeHlpExecRaiseAvxRelated },
9768 { kIemNativeLabelType_RaiseNm, iemNativeHlpExecRaiseNm },
9769 { kIemNativeLabelType_RaiseGp0, iemNativeHlpExecRaiseGp0 },
9770 { kIemNativeLabelType_RaiseMf, iemNativeHlpExecRaiseMf },
9771 { kIemNativeLabelType_RaiseXf, iemNativeHlpExecRaiseXf },
9772 { kIemNativeLabelType_ObsoleteTb, iemNativeHlpObsoleteTb },
9773 { kIemNativeLabelType_NeedCsLimChecking, iemNativeHlpNeedCsLimChecking },
9774 { kIemNativeLabelType_CheckBranchMiss, iemNativeHlpCheckBranchMiss },
9775 };
9776 AssertCompile(RT_ELEMENTS(g_aSimpleTailLabels) == (unsigned)kIemNativeLabelType_LastSimple + 1U);
9777 AssertCompile(kIemNativeLabelType_Invalid == 0);
9778 uint64_t fTailLabels = pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastSimple + 1U) - 2U);
9779 if (fTailLabels)
9780 {
9781 do
9782 {
9783 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)(ASMBitFirstSetU64(fTailLabels) - 1U);
9784 fTailLabels &= ~RT_BIT_64(enmLabel);
9785 Assert(g_aSimpleTailLabels[enmLabel].enmLabel == enmLabel);
9786
9787 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
9788 Assert(idxLabel != UINT32_MAX);
9789 if (idxLabel != UINT32_MAX)
9790 {
9791 iemNativeLabelDefine(pReNative, idxLabel, off);
9792
9793 /* int pfnCallback(PVMCPUCC pVCpu) */
9794 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9795 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_aSimpleTailLabels[enmLabel].pfnCallback);
9796
9797 /* jump back to the return sequence. */
9798 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
9799 }
9800
9801 } while (fTailLabels);
9802 }
9803 }
9804 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
9805 {
9806 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
9807 return pTb;
9808 }
9809 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
9810 Assert(off <= pReNative->cInstrBufAlloc);
9811
9812 /*
9813 * Make sure all labels has been defined.
9814 */
9815 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
9816#ifdef VBOX_STRICT
9817 uint32_t const cLabels = pReNative->cLabels;
9818 for (uint32_t i = 0; i < cLabels; i++)
9819 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
9820#endif
9821
9822 /*
9823 * Allocate executable memory, copy over the code we've generated.
9824 */
9825 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
9826 if (pTbAllocator->pDelayedFreeHead)
9827 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
9828
9829 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
9830 AssertReturn(paFinalInstrBuf, pTb);
9831 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
9832
9833 /*
9834 * Apply fixups.
9835 */
9836 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
9837 uint32_t const cFixups = pReNative->cFixups;
9838 for (uint32_t i = 0; i < cFixups; i++)
9839 {
9840 Assert(paFixups[i].off < off);
9841 Assert(paFixups[i].idxLabel < cLabels);
9842 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
9843 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
9844 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
9845 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
9846 switch (paFixups[i].enmType)
9847 {
9848#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
9849 case kIemNativeFixupType_Rel32:
9850 Assert(paFixups[i].off + 4 <= off);
9851 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9852 continue;
9853
9854#elif defined(RT_ARCH_ARM64)
9855 case kIemNativeFixupType_RelImm26At0:
9856 {
9857 Assert(paFixups[i].off < off);
9858 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9859 Assert(offDisp >= -262144 && offDisp < 262144);
9860 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
9861 continue;
9862 }
9863
9864 case kIemNativeFixupType_RelImm19At5:
9865 {
9866 Assert(paFixups[i].off < off);
9867 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9868 Assert(offDisp >= -262144 && offDisp < 262144);
9869 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
9870 continue;
9871 }
9872
9873 case kIemNativeFixupType_RelImm14At5:
9874 {
9875 Assert(paFixups[i].off < off);
9876 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9877 Assert(offDisp >= -8192 && offDisp < 8192);
9878 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
9879 continue;
9880 }
9881
9882#endif
9883 case kIemNativeFixupType_Invalid:
9884 case kIemNativeFixupType_End:
9885 break;
9886 }
9887 AssertFailed();
9888 }
9889
9890 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
9891 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
9892
9893 /*
9894 * Convert the translation block.
9895 */
9896 RTMemFree(pTb->Thrd.paCalls);
9897 pTb->Native.paInstructions = paFinalInstrBuf;
9898 pTb->Native.cInstructions = off;
9899 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
9900#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9901 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
9902 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
9903#endif
9904
9905 Assert(pTbAllocator->cThreadedTbs > 0);
9906 pTbAllocator->cThreadedTbs -= 1;
9907 pTbAllocator->cNativeTbs += 1;
9908 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
9909
9910#ifdef LOG_ENABLED
9911 /*
9912 * Disassemble to the log if enabled.
9913 */
9914 if (LogIs3Enabled())
9915 {
9916 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
9917 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
9918# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
9919 RTLogFlush(NULL);
9920# endif
9921 }
9922#endif
9923 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
9924
9925 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
9926 return pTb;
9927}
9928
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette