VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 103949

Last change on this file since 103949 was 103949, checked in by vboxsync, 11 months ago

VMM/IEM: Implement native emitters for IEM_MC_STORE_MEM_U256_NO_AC()/IEM_MC_STORE_MEM_FLAT_U256_NO_AC(), bugref:10614

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 425.8 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 103949 2024-03-20 11:32:44Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMN8veRecompilerTlbLookup.h"
103#include "IEMNativeFunctions.h"
104
105
106/*
107 * Narrow down configs here to avoid wasting time on unused configs here.
108 * Note! Same checks in IEMAllThrdRecompiler.cpp.
109 */
110
111#ifndef IEM_WITH_CODE_TLB
112# error The code TLB must be enabled for the recompiler.
113#endif
114
115#ifndef IEM_WITH_DATA_TLB
116# error The data TLB must be enabled for the recompiler.
117#endif
118
119#ifndef IEM_WITH_SETJMP
120# error The setjmp approach must be enabled for the recompiler.
121#endif
122
123/** @todo eliminate this clang build hack. */
124#if RT_CLANG_PREREQ(4, 0)
125# pragma GCC diagnostic ignored "-Wunused-function"
126#endif
127
128
129/*********************************************************************************************************************************
130* Internal Functions *
131*********************************************************************************************************************************/
132#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
133static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
134#endif
135DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
136DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
137 IEMNATIVEGSTREG enmGstReg, uint32_t off);
138DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
139
140
141/*********************************************************************************************************************************
142* Executable Memory Allocator *
143*********************************************************************************************************************************/
144/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
145 * Use an alternative chunk sub-allocator that does store internal data
146 * in the chunk.
147 *
148 * Using the RTHeapSimple is not practial on newer darwin systems where
149 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
150 * memory. We would have to change the protection of the whole chunk for
151 * every call to RTHeapSimple, which would be rather expensive.
152 *
153 * This alternative implemenation let restrict page protection modifications
154 * to the pages backing the executable memory we just allocated.
155 */
156#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
157/** The chunk sub-allocation unit size in bytes. */
158#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
159/** The chunk sub-allocation unit size as a shift factor. */
160#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
161
162#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
163# ifdef IEMNATIVE_USE_GDB_JIT
164# define IEMNATIVE_USE_GDB_JIT_ET_DYN
165
166/** GDB JIT: Code entry. */
167typedef struct GDBJITCODEENTRY
168{
169 struct GDBJITCODEENTRY *pNext;
170 struct GDBJITCODEENTRY *pPrev;
171 uint8_t *pbSymFile;
172 uint64_t cbSymFile;
173} GDBJITCODEENTRY;
174
175/** GDB JIT: Actions. */
176typedef enum GDBJITACTIONS : uint32_t
177{
178 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
179} GDBJITACTIONS;
180
181/** GDB JIT: Descriptor. */
182typedef struct GDBJITDESCRIPTOR
183{
184 uint32_t uVersion;
185 GDBJITACTIONS enmAction;
186 GDBJITCODEENTRY *pRelevant;
187 GDBJITCODEENTRY *pHead;
188 /** Our addition: */
189 GDBJITCODEENTRY *pTail;
190} GDBJITDESCRIPTOR;
191
192/** GDB JIT: Our simple symbol file data. */
193typedef struct GDBJITSYMFILE
194{
195 Elf64_Ehdr EHdr;
196# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
197 Elf64_Shdr aShdrs[5];
198# else
199 Elf64_Shdr aShdrs[7];
200 Elf64_Phdr aPhdrs[2];
201# endif
202 /** The dwarf ehframe data for the chunk. */
203 uint8_t abEhFrame[512];
204 char szzStrTab[128];
205 Elf64_Sym aSymbols[3];
206# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
207 Elf64_Sym aDynSyms[2];
208 Elf64_Dyn aDyn[6];
209# endif
210} GDBJITSYMFILE;
211
212extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
213extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
214
215/** Init once for g_IemNativeGdbJitLock. */
216static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
217/** Init once for the critical section. */
218static RTCRITSECT g_IemNativeGdbJitLock;
219
220/** GDB reads the info here. */
221GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
222
223/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
224DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
225{
226 ASMNopPause();
227}
228
229/** @callback_method_impl{FNRTONCE} */
230static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
231{
232 RT_NOREF(pvUser);
233 return RTCritSectInit(&g_IemNativeGdbJitLock);
234}
235
236
237# endif /* IEMNATIVE_USE_GDB_JIT */
238
239/**
240 * Per-chunk unwind info for non-windows hosts.
241 */
242typedef struct IEMEXECMEMCHUNKEHFRAME
243{
244# ifdef IEMNATIVE_USE_LIBUNWIND
245 /** The offset of the FDA into abEhFrame. */
246 uintptr_t offFda;
247# else
248 /** 'struct object' storage area. */
249 uint8_t abObject[1024];
250# endif
251# ifdef IEMNATIVE_USE_GDB_JIT
252# if 0
253 /** The GDB JIT 'symbol file' data. */
254 GDBJITSYMFILE GdbJitSymFile;
255# endif
256 /** The GDB JIT list entry. */
257 GDBJITCODEENTRY GdbJitEntry;
258# endif
259 /** The dwarf ehframe data for the chunk. */
260 uint8_t abEhFrame[512];
261} IEMEXECMEMCHUNKEHFRAME;
262/** Pointer to per-chunk info info for non-windows hosts. */
263typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
264#endif
265
266
267/**
268 * An chunk of executable memory.
269 */
270typedef struct IEMEXECMEMCHUNK
271{
272#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
273 /** Number of free items in this chunk. */
274 uint32_t cFreeUnits;
275 /** Hint were to start searching for free space in the allocation bitmap. */
276 uint32_t idxFreeHint;
277#else
278 /** The heap handle. */
279 RTHEAPSIMPLE hHeap;
280#endif
281 /** Pointer to the chunk. */
282 void *pvChunk;
283#ifdef IN_RING3
284 /**
285 * Pointer to the unwind information.
286 *
287 * This is used during C++ throw and longjmp (windows and probably most other
288 * platforms). Some debuggers (windbg) makes use of it as well.
289 *
290 * Windows: This is allocated from hHeap on windows because (at least for
291 * AMD64) the UNWIND_INFO structure address in the
292 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
293 *
294 * Others: Allocated from the regular heap to avoid unnecessary executable data
295 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
296 void *pvUnwindInfo;
297#elif defined(IN_RING0)
298 /** Allocation handle. */
299 RTR0MEMOBJ hMemObj;
300#endif
301} IEMEXECMEMCHUNK;
302/** Pointer to a memory chunk. */
303typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
304
305
306/**
307 * Executable memory allocator for the native recompiler.
308 */
309typedef struct IEMEXECMEMALLOCATOR
310{
311 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
312 uint32_t uMagic;
313
314 /** The chunk size. */
315 uint32_t cbChunk;
316 /** The maximum number of chunks. */
317 uint32_t cMaxChunks;
318 /** The current number of chunks. */
319 uint32_t cChunks;
320 /** Hint where to start looking for available memory. */
321 uint32_t idxChunkHint;
322 /** Statistics: Current number of allocations. */
323 uint32_t cAllocations;
324
325 /** The total amount of memory available. */
326 uint64_t cbTotal;
327 /** Total amount of free memory. */
328 uint64_t cbFree;
329 /** Total amount of memory allocated. */
330 uint64_t cbAllocated;
331
332#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
333 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
334 *
335 * Since the chunk size is a power of two and the minimum chunk size is a lot
336 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
337 * require a whole number of uint64_t elements in the allocation bitmap. So,
338 * for sake of simplicity, they are allocated as one continous chunk for
339 * simplicity/laziness. */
340 uint64_t *pbmAlloc;
341 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
342 uint32_t cUnitsPerChunk;
343 /** Number of bitmap elements per chunk (for quickly locating the bitmap
344 * portion corresponding to an chunk). */
345 uint32_t cBitmapElementsPerChunk;
346#else
347 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
348 * @{ */
349 /** The size of the heap internal block header. This is used to adjust the
350 * request memory size to make sure there is exacly enough room for a header at
351 * the end of the blocks we allocate before the next 64 byte alignment line. */
352 uint32_t cbHeapBlockHdr;
353 /** The size of initial heap allocation required make sure the first
354 * allocation is correctly aligned. */
355 uint32_t cbHeapAlignTweak;
356 /** The alignment tweak allocation address. */
357 void *pvAlignTweak;
358 /** @} */
359#endif
360
361#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
362 /** Pointer to the array of unwind info running parallel to aChunks (same
363 * allocation as this structure, located after the bitmaps).
364 * (For Windows, the structures must reside in 32-bit RVA distance to the
365 * actual chunk, so they are allocated off the chunk.) */
366 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
367#endif
368
369 /** The allocation chunks. */
370 RT_FLEXIBLE_ARRAY_EXTENSION
371 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
372} IEMEXECMEMALLOCATOR;
373/** Pointer to an executable memory allocator. */
374typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
375
376/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
377#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
378
379
380static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
381
382
383/**
384 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
385 * the heap statistics.
386 */
387static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
388 uint32_t cbReq, uint32_t idxChunk)
389{
390 pExecMemAllocator->cAllocations += 1;
391 pExecMemAllocator->cbAllocated += cbReq;
392#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
393 pExecMemAllocator->cbFree -= cbReq;
394#else
395 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
396#endif
397 pExecMemAllocator->idxChunkHint = idxChunk;
398
399#ifdef RT_OS_DARWIN
400 /*
401 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
402 * on darwin. So, we mark the pages returned as read+write after alloc and
403 * expect the caller to call iemExecMemAllocatorReadyForUse when done
404 * writing to the allocation.
405 *
406 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
407 * for details.
408 */
409 /** @todo detect if this is necessary... it wasn't required on 10.15 or
410 * whatever older version it was. */
411 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
412 AssertRC(rc);
413#endif
414
415 return pvRet;
416}
417
418
419#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
420static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
421 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
422{
423 /*
424 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
425 */
426 Assert(!(cToScan & 63));
427 Assert(!(idxFirst & 63));
428 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
429 pbmAlloc += idxFirst / 64;
430
431 /*
432 * Scan the bitmap for cReqUnits of consequtive clear bits
433 */
434 /** @todo This can probably be done more efficiently for non-x86 systems. */
435 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
436 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
437 {
438 uint32_t idxAddBit = 1;
439 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
440 idxAddBit++;
441 if (idxAddBit >= cReqUnits)
442 {
443 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
444
445 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
446 pChunk->cFreeUnits -= cReqUnits;
447 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
448
449 void * const pvRet = (uint8_t *)pChunk->pvChunk
450 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
451
452 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
453 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
454 }
455
456 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
457 }
458 return NULL;
459}
460#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
461
462
463static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
464{
465#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
466 /*
467 * Figure out how much to allocate.
468 */
469 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
470 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
471 {
472 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
473 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
474 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
475 {
476 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
477 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
478 if (pvRet)
479 return pvRet;
480 }
481 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
482 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
483 cReqUnits, idxChunk);
484 }
485#else
486 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
487 if (pvRet)
488 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
489#endif
490 return NULL;
491
492}
493
494
495/**
496 * Allocates @a cbReq bytes of executable memory.
497 *
498 * @returns Pointer to the memory, NULL if out of memory or other problem
499 * encountered.
500 * @param pVCpu The cross context virtual CPU structure of the calling
501 * thread.
502 * @param cbReq How many bytes are required.
503 */
504static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
505{
506 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
507 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
508 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
509
510
511 for (unsigned iIteration = 0;; iIteration++)
512 {
513 /*
514 * Adjust the request size so it'll fit the allocator alignment/whatnot.
515 *
516 * For the RTHeapSimple allocator this means to follow the logic described
517 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
518 * existing chunks if we think we've got sufficient free memory around.
519 *
520 * While for the alternative one we just align it up to a whole unit size.
521 */
522#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
523 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
524#else
525 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
526#endif
527 if (cbReq <= pExecMemAllocator->cbFree)
528 {
529 uint32_t const cChunks = pExecMemAllocator->cChunks;
530 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
531 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
532 {
533 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
534 if (pvRet)
535 return pvRet;
536 }
537 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
538 {
539 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
540 if (pvRet)
541 return pvRet;
542 }
543 }
544
545 /*
546 * Can we grow it with another chunk?
547 */
548 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
549 {
550 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
551 AssertLogRelRCReturn(rc, NULL);
552
553 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
554 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
555 if (pvRet)
556 return pvRet;
557 AssertFailed();
558 }
559
560 /*
561 * Try prune native TBs once.
562 */
563 if (iIteration == 0)
564 iemTbAllocatorFreeupNativeSpace(pVCpu, cbReq / sizeof(IEMNATIVEINSTR));
565 else
566 {
567 /** @todo stats... */
568 return NULL;
569 }
570 }
571
572}
573
574
575/** This is a hook that we may need later for changing memory protection back
576 * to readonly+exec */
577static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
578{
579#ifdef RT_OS_DARWIN
580 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
581 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
582 AssertRC(rc); RT_NOREF(pVCpu);
583
584 /*
585 * Flush the instruction cache:
586 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
587 */
588 /* sys_dcache_flush(pv, cb); - not necessary */
589 sys_icache_invalidate(pv, cb);
590#else
591 RT_NOREF(pVCpu, pv, cb);
592#endif
593}
594
595
596/**
597 * Frees executable memory.
598 */
599void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
600{
601 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
602 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
603 Assert(pv);
604#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
605 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
606#else
607 Assert(!((uintptr_t)pv & 63));
608#endif
609
610 /* Align the size as we did when allocating the block. */
611#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
612 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
613#else
614 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
615#endif
616
617 /* Free it / assert sanity. */
618#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
619 uint32_t const cChunks = pExecMemAllocator->cChunks;
620 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
621 bool fFound = false;
622 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
623 {
624 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
625 fFound = offChunk < cbChunk;
626 if (fFound)
627 {
628#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
629 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
630 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
631
632 /* Check that it's valid and free it. */
633 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
634 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
635 for (uint32_t i = 1; i < cReqUnits; i++)
636 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
637 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
638
639 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
640 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
641
642 /* Update the stats. */
643 pExecMemAllocator->cbAllocated -= cb;
644 pExecMemAllocator->cbFree += cb;
645 pExecMemAllocator->cAllocations -= 1;
646 return;
647#else
648 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
649 break;
650#endif
651 }
652 }
653# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
654 AssertFailed();
655# else
656 Assert(fFound);
657# endif
658#endif
659
660#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
661 /* Update stats while cb is freshly calculated.*/
662 pExecMemAllocator->cbAllocated -= cb;
663 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
664 pExecMemAllocator->cAllocations -= 1;
665
666 /* Free it. */
667 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
668#endif
669}
670
671
672
673#ifdef IN_RING3
674# ifdef RT_OS_WINDOWS
675
676/**
677 * Initializes the unwind info structures for windows hosts.
678 */
679static int
680iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
681 void *pvChunk, uint32_t idxChunk)
682{
683 RT_NOREF(pVCpu);
684
685 /*
686 * The AMD64 unwind opcodes.
687 *
688 * This is a program that starts with RSP after a RET instruction that
689 * ends up in recompiled code, and the operations we describe here will
690 * restore all non-volatile registers and bring RSP back to where our
691 * RET address is. This means it's reverse order from what happens in
692 * the prologue.
693 *
694 * Note! Using a frame register approach here both because we have one
695 * and but mainly because the UWOP_ALLOC_LARGE argument values
696 * would be a pain to write initializers for. On the positive
697 * side, we're impervious to changes in the the stack variable
698 * area can can deal with dynamic stack allocations if necessary.
699 */
700 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
701 {
702 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
703 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
704 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
705 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
706 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
707 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
708 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
709 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
710 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
711 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
712 };
713 union
714 {
715 IMAGE_UNWIND_INFO Info;
716 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
717 } s_UnwindInfo =
718 {
719 {
720 /* .Version = */ 1,
721 /* .Flags = */ 0,
722 /* .SizeOfProlog = */ 16, /* whatever */
723 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
724 /* .FrameRegister = */ X86_GREG_xBP,
725 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
726 }
727 };
728 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
729 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
730
731 /*
732 * Calc how much space we need and allocate it off the exec heap.
733 */
734 unsigned const cFunctionEntries = 1;
735 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
736 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
737# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
738 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
739 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
740 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
741# else
742 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
743 - pExecMemAllocator->cbHeapBlockHdr;
744 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
745 32 /*cbAlignment*/);
746# endif
747 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
748 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
749
750 /*
751 * Initialize the structures.
752 */
753 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
754
755 paFunctions[0].BeginAddress = 0;
756 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
757 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
758
759 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
760 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
761
762 /*
763 * Register it.
764 */
765 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
766 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
767
768 return VINF_SUCCESS;
769}
770
771
772# else /* !RT_OS_WINDOWS */
773
774/**
775 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
776 */
777DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
778{
779 if (iValue >= 64)
780 {
781 Assert(iValue < 0x2000);
782 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
783 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
784 }
785 else if (iValue >= 0)
786 *Ptr.pb++ = (uint8_t)iValue;
787 else if (iValue > -64)
788 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
789 else
790 {
791 Assert(iValue > -0x2000);
792 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
793 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
794 }
795 return Ptr;
796}
797
798
799/**
800 * Emits an ULEB128 encoded value (up to 64-bit wide).
801 */
802DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
803{
804 while (uValue >= 0x80)
805 {
806 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
807 uValue >>= 7;
808 }
809 *Ptr.pb++ = (uint8_t)uValue;
810 return Ptr;
811}
812
813
814/**
815 * Emits a CFA rule as register @a uReg + offset @a off.
816 */
817DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
818{
819 *Ptr.pb++ = DW_CFA_def_cfa;
820 Ptr = iemDwarfPutUleb128(Ptr, uReg);
821 Ptr = iemDwarfPutUleb128(Ptr, off);
822 return Ptr;
823}
824
825
826/**
827 * Emits a register (@a uReg) save location:
828 * CFA + @a off * data_alignment_factor
829 */
830DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
831{
832 if (uReg < 0x40)
833 *Ptr.pb++ = DW_CFA_offset | uReg;
834 else
835 {
836 *Ptr.pb++ = DW_CFA_offset_extended;
837 Ptr = iemDwarfPutUleb128(Ptr, uReg);
838 }
839 Ptr = iemDwarfPutUleb128(Ptr, off);
840 return Ptr;
841}
842
843
844# if 0 /* unused */
845/**
846 * Emits a register (@a uReg) save location, using signed offset:
847 * CFA + @a offSigned * data_alignment_factor
848 */
849DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
850{
851 *Ptr.pb++ = DW_CFA_offset_extended_sf;
852 Ptr = iemDwarfPutUleb128(Ptr, uReg);
853 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
854 return Ptr;
855}
856# endif
857
858
859/**
860 * Initializes the unwind info section for non-windows hosts.
861 */
862static int
863iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
864 void *pvChunk, uint32_t idxChunk)
865{
866 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
867 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
868
869 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
870
871 /*
872 * Generate the CIE first.
873 */
874# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
875 uint8_t const iDwarfVer = 3;
876# else
877 uint8_t const iDwarfVer = 4;
878# endif
879 RTPTRUNION const PtrCie = Ptr;
880 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
881 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
882 *Ptr.pb++ = iDwarfVer; /* DwARF version */
883 *Ptr.pb++ = 0; /* Augmentation. */
884 if (iDwarfVer >= 4)
885 {
886 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
887 *Ptr.pb++ = 0; /* Segment selector size. */
888 }
889# ifdef RT_ARCH_AMD64
890 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
891# else
892 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
893# endif
894 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
895# ifdef RT_ARCH_AMD64
896 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
897# elif defined(RT_ARCH_ARM64)
898 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
899# else
900# error "port me"
901# endif
902 /* Initial instructions: */
903# ifdef RT_ARCH_AMD64
904 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
905 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
906 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
907 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
908 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
909 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
910 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
911 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
912# elif defined(RT_ARCH_ARM64)
913# if 1
914 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
915# else
916 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
917# endif
918 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
919 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
920 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
921 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
922 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
923 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
924 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
925 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
926 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
927 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
928 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
929 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
930 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
931 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
932# else
933# error "port me"
934# endif
935 while ((Ptr.u - PtrCie.u) & 3)
936 *Ptr.pb++ = DW_CFA_nop;
937 /* Finalize the CIE size. */
938 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
939
940 /*
941 * Generate an FDE for the whole chunk area.
942 */
943# ifdef IEMNATIVE_USE_LIBUNWIND
944 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
945# endif
946 RTPTRUNION const PtrFde = Ptr;
947 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
948 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
949 Ptr.pu32++;
950 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
951 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
952# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
953 *Ptr.pb++ = DW_CFA_nop;
954# endif
955 while ((Ptr.u - PtrFde.u) & 3)
956 *Ptr.pb++ = DW_CFA_nop;
957 /* Finalize the FDE size. */
958 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
959
960 /* Terminator entry. */
961 *Ptr.pu32++ = 0;
962 *Ptr.pu32++ = 0; /* just to be sure... */
963 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
964
965 /*
966 * Register it.
967 */
968# ifdef IEMNATIVE_USE_LIBUNWIND
969 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
970# else
971 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
972 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
973# endif
974
975# ifdef IEMNATIVE_USE_GDB_JIT
976 /*
977 * Now for telling GDB about this (experimental).
978 *
979 * This seems to work best with ET_DYN.
980 */
981 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
982# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
983 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
984 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
985# else
986 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
987 - pExecMemAllocator->cbHeapBlockHdr;
988 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
989# endif
990 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
991 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
992
993 RT_ZERO(*pSymFile);
994
995 /*
996 * The ELF header:
997 */
998 pSymFile->EHdr.e_ident[0] = ELFMAG0;
999 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1000 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1001 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1002 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1003 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1004 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1005 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1006# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1007 pSymFile->EHdr.e_type = ET_DYN;
1008# else
1009 pSymFile->EHdr.e_type = ET_REL;
1010# endif
1011# ifdef RT_ARCH_AMD64
1012 pSymFile->EHdr.e_machine = EM_AMD64;
1013# elif defined(RT_ARCH_ARM64)
1014 pSymFile->EHdr.e_machine = EM_AARCH64;
1015# else
1016# error "port me"
1017# endif
1018 pSymFile->EHdr.e_version = 1; /*?*/
1019 pSymFile->EHdr.e_entry = 0;
1020# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1021 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1022# else
1023 pSymFile->EHdr.e_phoff = 0;
1024# endif
1025 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1026 pSymFile->EHdr.e_flags = 0;
1027 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1028# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1029 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1030 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1031# else
1032 pSymFile->EHdr.e_phentsize = 0;
1033 pSymFile->EHdr.e_phnum = 0;
1034# endif
1035 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1036 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1037 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1038
1039 uint32_t offStrTab = 0;
1040#define APPEND_STR(a_szStr) do { \
1041 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1042 offStrTab += sizeof(a_szStr); \
1043 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1044 } while (0)
1045#define APPEND_STR_FMT(a_szStr, ...) do { \
1046 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1047 offStrTab++; \
1048 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1049 } while (0)
1050
1051 /*
1052 * Section headers.
1053 */
1054 /* Section header #0: NULL */
1055 unsigned i = 0;
1056 APPEND_STR("");
1057 RT_ZERO(pSymFile->aShdrs[i]);
1058 i++;
1059
1060 /* Section header: .eh_frame */
1061 pSymFile->aShdrs[i].sh_name = offStrTab;
1062 APPEND_STR(".eh_frame");
1063 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1064 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1065# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1066 pSymFile->aShdrs[i].sh_offset
1067 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1068# else
1069 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1070 pSymFile->aShdrs[i].sh_offset = 0;
1071# endif
1072
1073 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1074 pSymFile->aShdrs[i].sh_link = 0;
1075 pSymFile->aShdrs[i].sh_info = 0;
1076 pSymFile->aShdrs[i].sh_addralign = 1;
1077 pSymFile->aShdrs[i].sh_entsize = 0;
1078 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1079 i++;
1080
1081 /* Section header: .shstrtab */
1082 unsigned const iShStrTab = i;
1083 pSymFile->EHdr.e_shstrndx = iShStrTab;
1084 pSymFile->aShdrs[i].sh_name = offStrTab;
1085 APPEND_STR(".shstrtab");
1086 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1087 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1088# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1089 pSymFile->aShdrs[i].sh_offset
1090 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1091# else
1092 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1093 pSymFile->aShdrs[i].sh_offset = 0;
1094# endif
1095 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1096 pSymFile->aShdrs[i].sh_link = 0;
1097 pSymFile->aShdrs[i].sh_info = 0;
1098 pSymFile->aShdrs[i].sh_addralign = 1;
1099 pSymFile->aShdrs[i].sh_entsize = 0;
1100 i++;
1101
1102 /* Section header: .symbols */
1103 pSymFile->aShdrs[i].sh_name = offStrTab;
1104 APPEND_STR(".symtab");
1105 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1106 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1107 pSymFile->aShdrs[i].sh_offset
1108 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1109 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1110 pSymFile->aShdrs[i].sh_link = iShStrTab;
1111 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1112 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1113 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1114 i++;
1115
1116# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1117 /* Section header: .symbols */
1118 pSymFile->aShdrs[i].sh_name = offStrTab;
1119 APPEND_STR(".dynsym");
1120 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1121 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1122 pSymFile->aShdrs[i].sh_offset
1123 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1124 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1125 pSymFile->aShdrs[i].sh_link = iShStrTab;
1126 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1127 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1128 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1129 i++;
1130# endif
1131
1132# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1133 /* Section header: .dynamic */
1134 pSymFile->aShdrs[i].sh_name = offStrTab;
1135 APPEND_STR(".dynamic");
1136 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1137 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1138 pSymFile->aShdrs[i].sh_offset
1139 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1140 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1141 pSymFile->aShdrs[i].sh_link = iShStrTab;
1142 pSymFile->aShdrs[i].sh_info = 0;
1143 pSymFile->aShdrs[i].sh_addralign = 1;
1144 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1145 i++;
1146# endif
1147
1148 /* Section header: .text */
1149 unsigned const iShText = i;
1150 pSymFile->aShdrs[i].sh_name = offStrTab;
1151 APPEND_STR(".text");
1152 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1153 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1154# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1155 pSymFile->aShdrs[i].sh_offset
1156 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1157# else
1158 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1159 pSymFile->aShdrs[i].sh_offset = 0;
1160# endif
1161 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1162 pSymFile->aShdrs[i].sh_link = 0;
1163 pSymFile->aShdrs[i].sh_info = 0;
1164 pSymFile->aShdrs[i].sh_addralign = 1;
1165 pSymFile->aShdrs[i].sh_entsize = 0;
1166 i++;
1167
1168 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1169
1170# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1171 /*
1172 * The program headers:
1173 */
1174 /* Everything in a single LOAD segment: */
1175 i = 0;
1176 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1177 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1178 pSymFile->aPhdrs[i].p_offset
1179 = pSymFile->aPhdrs[i].p_vaddr
1180 = pSymFile->aPhdrs[i].p_paddr = 0;
1181 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1182 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1183 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1184 i++;
1185 /* The .dynamic segment. */
1186 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1187 pSymFile->aPhdrs[i].p_flags = PF_R;
1188 pSymFile->aPhdrs[i].p_offset
1189 = pSymFile->aPhdrs[i].p_vaddr
1190 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1191 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1192 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1193 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1194 i++;
1195
1196 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1197
1198 /*
1199 * The dynamic section:
1200 */
1201 i = 0;
1202 pSymFile->aDyn[i].d_tag = DT_SONAME;
1203 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1204 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1205 i++;
1206 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1207 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1208 i++;
1209 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1210 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1211 i++;
1212 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1213 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1214 i++;
1215 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1216 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1217 i++;
1218 pSymFile->aDyn[i].d_tag = DT_NULL;
1219 i++;
1220 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1221# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1222
1223 /*
1224 * Symbol tables:
1225 */
1226 /** @todo gdb doesn't seem to really like this ... */
1227 i = 0;
1228 pSymFile->aSymbols[i].st_name = 0;
1229 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1230 pSymFile->aSymbols[i].st_value = 0;
1231 pSymFile->aSymbols[i].st_size = 0;
1232 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1233 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1234# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1235 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1236# endif
1237 i++;
1238
1239 pSymFile->aSymbols[i].st_name = 0;
1240 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1241 pSymFile->aSymbols[i].st_value = 0;
1242 pSymFile->aSymbols[i].st_size = 0;
1243 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1244 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1245 i++;
1246
1247 pSymFile->aSymbols[i].st_name = offStrTab;
1248 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1249# if 0
1250 pSymFile->aSymbols[i].st_shndx = iShText;
1251 pSymFile->aSymbols[i].st_value = 0;
1252# else
1253 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1254 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1255# endif
1256 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1257 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1258 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1259# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1260 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1261 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1262# endif
1263 i++;
1264
1265 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1266 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1267
1268 /*
1269 * The GDB JIT entry and informing GDB.
1270 */
1271 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1272# if 1
1273 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1274# else
1275 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1276# endif
1277
1278 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1279 RTCritSectEnter(&g_IemNativeGdbJitLock);
1280 pEhFrame->GdbJitEntry.pNext = NULL;
1281 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1282 if (__jit_debug_descriptor.pTail)
1283 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1284 else
1285 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1286 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1287 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1288
1289 /* Notify GDB: */
1290 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1291 __jit_debug_register_code();
1292 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1293 RTCritSectLeave(&g_IemNativeGdbJitLock);
1294
1295# else /* !IEMNATIVE_USE_GDB_JIT */
1296 RT_NOREF(pVCpu);
1297# endif /* !IEMNATIVE_USE_GDB_JIT */
1298
1299 return VINF_SUCCESS;
1300}
1301
1302# endif /* !RT_OS_WINDOWS */
1303#endif /* IN_RING3 */
1304
1305
1306/**
1307 * Adds another chunk to the executable memory allocator.
1308 *
1309 * This is used by the init code for the initial allocation and later by the
1310 * regular allocator function when it's out of memory.
1311 */
1312static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1313{
1314 /* Check that we've room for growth. */
1315 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1316 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1317
1318 /* Allocate a chunk. */
1319#ifdef RT_OS_DARWIN
1320 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1321#else
1322 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1323#endif
1324 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1325
1326#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1327 int rc = VINF_SUCCESS;
1328#else
1329 /* Initialize the heap for the chunk. */
1330 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1331 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1332 AssertRC(rc);
1333 if (RT_SUCCESS(rc))
1334 {
1335 /*
1336 * We want the memory to be aligned on 64 byte, so the first time thru
1337 * here we do some exploratory allocations to see how we can achieve this.
1338 * On subsequent runs we only make an initial adjustment allocation, if
1339 * necessary.
1340 *
1341 * Since we own the heap implementation, we know that the internal block
1342 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1343 * so all we need to wrt allocation size adjustments is to add 32 bytes
1344 * to the size, align up by 64 bytes, and subtract 32 bytes.
1345 *
1346 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1347 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1348 * allocation to force subsequent allocations to return 64 byte aligned
1349 * user areas.
1350 */
1351 if (!pExecMemAllocator->cbHeapBlockHdr)
1352 {
1353 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1354 pExecMemAllocator->cbHeapAlignTweak = 64;
1355 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1356 32 /*cbAlignment*/);
1357 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1358
1359 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1360 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1361 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1362 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1363 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1364
1365 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1366 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1367 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1368 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1369 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1370
1371 RTHeapSimpleFree(hHeap, pvTest2);
1372 RTHeapSimpleFree(hHeap, pvTest1);
1373 }
1374 else
1375 {
1376 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1377 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1378 }
1379 if (RT_SUCCESS(rc))
1380#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1381 {
1382 /*
1383 * Add the chunk.
1384 *
1385 * This must be done before the unwind init so windows can allocate
1386 * memory from the chunk when using the alternative sub-allocator.
1387 */
1388 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1389#ifdef IN_RING3
1390 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1391#endif
1392#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1393 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1394#else
1395 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1396 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1397 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1398 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1399#endif
1400
1401 pExecMemAllocator->cChunks = idxChunk + 1;
1402 pExecMemAllocator->idxChunkHint = idxChunk;
1403
1404#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1405 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1406 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1407#else
1408 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1409 pExecMemAllocator->cbTotal += cbFree;
1410 pExecMemAllocator->cbFree += cbFree;
1411#endif
1412
1413#ifdef IN_RING3
1414 /*
1415 * Initialize the unwind information (this cannot really fail atm).
1416 * (This sets pvUnwindInfo.)
1417 */
1418 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1419 if (RT_SUCCESS(rc))
1420#endif
1421 {
1422 return VINF_SUCCESS;
1423 }
1424
1425#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1426 /* Just in case the impossible happens, undo the above up: */
1427 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1428 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1429 pExecMemAllocator->cChunks = idxChunk;
1430 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1431 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1432 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1433 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1434#endif
1435 }
1436#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1437 }
1438#endif
1439 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1440 RT_NOREF(pVCpu);
1441 return rc;
1442}
1443
1444
1445/**
1446 * Initializes the executable memory allocator for native recompilation on the
1447 * calling EMT.
1448 *
1449 * @returns VBox status code.
1450 * @param pVCpu The cross context virtual CPU structure of the calling
1451 * thread.
1452 * @param cbMax The max size of the allocator.
1453 * @param cbInitial The initial allocator size.
1454 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1455 * dependent).
1456 */
1457int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1458{
1459 /*
1460 * Validate input.
1461 */
1462 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1463 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1464 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1465 || cbChunk == 0
1466 || ( RT_IS_POWER_OF_TWO(cbChunk)
1467 && cbChunk >= _1M
1468 && cbChunk <= _256M
1469 && cbChunk <= cbMax),
1470 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1471 VERR_OUT_OF_RANGE);
1472
1473 /*
1474 * Adjust/figure out the chunk size.
1475 */
1476 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1477 {
1478 if (cbMax >= _256M)
1479 cbChunk = _64M;
1480 else
1481 {
1482 if (cbMax < _16M)
1483 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1484 else
1485 cbChunk = (uint32_t)cbMax / 4;
1486 if (!RT_IS_POWER_OF_TWO(cbChunk))
1487 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1488 }
1489 }
1490
1491 if (cbChunk > cbMax)
1492 cbMax = cbChunk;
1493 else
1494 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1495 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1496 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1497
1498 /*
1499 * Allocate and initialize the allocatore instance.
1500 */
1501 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1502#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1503 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1504 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1505 cbNeeded += cbBitmap * cMaxChunks;
1506 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1507 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1508#endif
1509#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1510 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1511 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1512#endif
1513 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1514 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1515 VERR_NO_MEMORY);
1516 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1517 pExecMemAllocator->cbChunk = cbChunk;
1518 pExecMemAllocator->cMaxChunks = cMaxChunks;
1519 pExecMemAllocator->cChunks = 0;
1520 pExecMemAllocator->idxChunkHint = 0;
1521 pExecMemAllocator->cAllocations = 0;
1522 pExecMemAllocator->cbTotal = 0;
1523 pExecMemAllocator->cbFree = 0;
1524 pExecMemAllocator->cbAllocated = 0;
1525#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1526 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1527 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1528 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1529 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1530#endif
1531#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1532 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1533#endif
1534 for (uint32_t i = 0; i < cMaxChunks; i++)
1535 {
1536#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1537 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1538 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1539#else
1540 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1541#endif
1542 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1543#ifdef IN_RING0
1544 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1545#else
1546 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1547#endif
1548 }
1549 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1550
1551 /*
1552 * Do the initial allocations.
1553 */
1554 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1555 {
1556 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1557 AssertLogRelRCReturn(rc, rc);
1558 }
1559
1560 pExecMemAllocator->idxChunkHint = 0;
1561
1562 return VINF_SUCCESS;
1563}
1564
1565
1566/*********************************************************************************************************************************
1567* Native Recompilation *
1568*********************************************************************************************************************************/
1569
1570
1571/**
1572 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1573 */
1574IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1575{
1576 pVCpu->iem.s.cInstructions += idxInstr;
1577 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1578}
1579
1580
1581/**
1582 * Used by TB code when it wants to raise a \#DE.
1583 */
1584IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseDe,(PVMCPUCC pVCpu))
1585{
1586 iemRaiseDivideErrorJmp(pVCpu);
1587#ifndef _MSC_VER
1588 return VINF_IEM_RAISED_XCPT; /* not reached */
1589#endif
1590}
1591
1592
1593/**
1594 * Used by TB code when it wants to raise a \#UD.
1595 */
1596IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
1597{
1598 iemRaiseUndefinedOpcodeJmp(pVCpu);
1599#ifndef _MSC_VER
1600 return VINF_IEM_RAISED_XCPT; /* not reached */
1601#endif
1602}
1603
1604
1605/**
1606 * Used by TB code when it wants to raise an SSE related \#UD or \#NM.
1607 *
1608 * See IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT.
1609 */
1610IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseRelated,(PVMCPUCC pVCpu))
1611{
1612 if ( (pVCpu->cpum.GstCtx.cr0 & X86_CR0_EM)
1613 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSFXSR))
1614 iemRaiseUndefinedOpcodeJmp(pVCpu);
1615 else
1616 iemRaiseDeviceNotAvailableJmp(pVCpu);
1617#ifndef _MSC_VER
1618 return VINF_IEM_RAISED_XCPT; /* not reached */
1619#endif
1620}
1621
1622
1623/**
1624 * Used by TB code when it wants to raise an AVX related \#UD or \#NM.
1625 *
1626 * See IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT.
1627 */
1628IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseAvxRelated,(PVMCPUCC pVCpu))
1629{
1630 if ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE)
1631 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE))
1632 iemRaiseUndefinedOpcodeJmp(pVCpu);
1633 else
1634 iemRaiseDeviceNotAvailableJmp(pVCpu);
1635#ifndef _MSC_VER
1636 return VINF_IEM_RAISED_XCPT; /* not reached */
1637#endif
1638}
1639
1640
1641/**
1642 * Used by TB code when it wants to raise a \#NM.
1643 */
1644IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
1645{
1646 iemRaiseDeviceNotAvailableJmp(pVCpu);
1647#ifndef _MSC_VER
1648 return VINF_IEM_RAISED_XCPT; /* not reached */
1649#endif
1650}
1651
1652
1653/**
1654 * Used by TB code when it wants to raise a \#GP(0).
1655 */
1656IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
1657{
1658 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1659#ifndef _MSC_VER
1660 return VINF_IEM_RAISED_XCPT; /* not reached */
1661#endif
1662}
1663
1664
1665/**
1666 * Used by TB code when it wants to raise a \#MF.
1667 */
1668IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
1669{
1670 iemRaiseMathFaultJmp(pVCpu);
1671#ifndef _MSC_VER
1672 return VINF_IEM_RAISED_XCPT; /* not reached */
1673#endif
1674}
1675
1676
1677/**
1678 * Used by TB code when it wants to raise a \#XF.
1679 */
1680IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
1681{
1682 iemRaiseSimdFpExceptionJmp(pVCpu);
1683#ifndef _MSC_VER
1684 return VINF_IEM_RAISED_XCPT; /* not reached */
1685#endif
1686}
1687
1688
1689/**
1690 * Used by TB code when detecting opcode changes.
1691 * @see iemThreadeFuncWorkerObsoleteTb
1692 */
1693IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
1694{
1695 /* We set fSafeToFree to false where as we're being called in the context
1696 of a TB callback function, which for native TBs means we cannot release
1697 the executable memory till we've returned our way back to iemTbExec as
1698 that return path codes via the native code generated for the TB. */
1699 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1700 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
1701 return VINF_IEM_REEXEC_BREAK;
1702}
1703
1704
1705/**
1706 * Used by TB code when we need to switch to a TB with CS.LIM checking.
1707 */
1708IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
1709{
1710 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
1711 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1712 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
1713 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
1714 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
1715 return VINF_IEM_REEXEC_BREAK;
1716}
1717
1718
1719/**
1720 * Used by TB code when we missed a PC check after a branch.
1721 */
1722IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
1723{
1724 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
1725 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1726 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
1727 pVCpu->iem.s.pbInstrBuf));
1728 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
1729 return VINF_IEM_REEXEC_BREAK;
1730}
1731
1732
1733
1734/*********************************************************************************************************************************
1735* Helpers: Segmented memory fetches and stores. *
1736*********************************************************************************************************************************/
1737
1738/**
1739 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1740 */
1741IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1742{
1743#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1744 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1745#else
1746 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1747#endif
1748}
1749
1750
1751/**
1752 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1753 * to 16 bits.
1754 */
1755IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1756{
1757#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1758 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1759#else
1760 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1761#endif
1762}
1763
1764
1765/**
1766 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1767 * to 32 bits.
1768 */
1769IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1770{
1771#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1772 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1773#else
1774 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1775#endif
1776}
1777
1778/**
1779 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1780 * to 64 bits.
1781 */
1782IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1783{
1784#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1785 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1786#else
1787 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1788#endif
1789}
1790
1791
1792/**
1793 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1794 */
1795IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1796{
1797#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1798 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1799#else
1800 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1801#endif
1802}
1803
1804
1805/**
1806 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1807 * to 32 bits.
1808 */
1809IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1810{
1811#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1812 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1813#else
1814 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1815#endif
1816}
1817
1818
1819/**
1820 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1821 * to 64 bits.
1822 */
1823IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1824{
1825#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1826 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1827#else
1828 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1829#endif
1830}
1831
1832
1833/**
1834 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1835 */
1836IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1837{
1838#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1839 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1840#else
1841 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1842#endif
1843}
1844
1845
1846/**
1847 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
1848 * to 64 bits.
1849 */
1850IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1851{
1852#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1853 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1854#else
1855 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1856#endif
1857}
1858
1859
1860/**
1861 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1862 */
1863IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1864{
1865#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1866 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
1867#else
1868 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
1869#endif
1870}
1871
1872
1873#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1874/**
1875 * Used by TB code to load 128-bit data w/ segmentation.
1876 */
1877IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
1878{
1879#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1880 iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1881#else
1882 iemMemFetchDataU128Jmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1883#endif
1884}
1885
1886
1887/**
1888 * Used by TB code to load 128-bit data w/ segmentation.
1889 */
1890IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
1891{
1892#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1893 iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1894#else
1895 iemMemFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1896#endif
1897}
1898
1899
1900/**
1901 * Used by TB code to load 128-bit data w/ segmentation.
1902 */
1903IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
1904{
1905#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1906 iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1907#else
1908 iemMemFetchDataU128NoAcJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1909#endif
1910}
1911
1912
1913/**
1914 * Used by TB code to load 256-bit data w/ segmentation.
1915 */
1916IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
1917{
1918#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1919 iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
1920#else
1921 iemMemFetchDataU256NoAcJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
1922#endif
1923}
1924#endif
1925
1926
1927/**
1928 * Used by TB code to store unsigned 8-bit data w/ segmentation.
1929 */
1930IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
1931{
1932#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1933 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1934#else
1935 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1936#endif
1937}
1938
1939
1940/**
1941 * Used by TB code to store unsigned 16-bit data w/ segmentation.
1942 */
1943IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
1944{
1945#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1946 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1947#else
1948 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1949#endif
1950}
1951
1952
1953/**
1954 * Used by TB code to store unsigned 32-bit data w/ segmentation.
1955 */
1956IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
1957{
1958#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1959 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1960#else
1961 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1962#endif
1963}
1964
1965
1966/**
1967 * Used by TB code to store unsigned 64-bit data w/ segmentation.
1968 */
1969IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
1970{
1971#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1972 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1973#else
1974 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1975#endif
1976}
1977
1978
1979#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1980/**
1981 * Used by TB code to store unsigned 128-bit data w/ segmentation.
1982 */
1983IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
1984{
1985#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1986 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
1987#else
1988 iemMemStoreDataU128AlignedSseJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
1989#endif
1990}
1991
1992
1993/**
1994 * Used by TB code to store unsigned 128-bit data w/ segmentation.
1995 */
1996IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
1997{
1998#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1999 iemMemStoreDataU128NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
2000#else
2001 iemMemStoreDataU128NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
2002#endif
2003}
2004
2005
2006/**
2007 * Used by TB code to store unsigned 256-bit data w/ segmentation.
2008 */
2009IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
2010{
2011#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2012 iemMemStoreDataU256NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
2013#else
2014 iemMemStoreDataU256NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
2015#endif
2016}
2017#endif
2018
2019
2020
2021/**
2022 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
2023 */
2024IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2025{
2026#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2027 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2028#else
2029 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2030#endif
2031}
2032
2033
2034/**
2035 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
2036 */
2037IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2038{
2039#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2040 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2041#else
2042 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2043#endif
2044}
2045
2046
2047/**
2048 * Used by TB code to store an 32-bit selector value onto a generic stack.
2049 *
2050 * Intel CPUs doesn't do write a whole dword, thus the special function.
2051 */
2052IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2053{
2054#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2055 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2056#else
2057 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2058#endif
2059}
2060
2061
2062/**
2063 * Used by TB code to push unsigned 64-bit value onto a generic stack.
2064 */
2065IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2066{
2067#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2068 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2069#else
2070 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2071#endif
2072}
2073
2074
2075/**
2076 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2077 */
2078IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2079{
2080#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2081 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2082#else
2083 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
2084#endif
2085}
2086
2087
2088/**
2089 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2090 */
2091IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2092{
2093#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2094 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2095#else
2096 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
2097#endif
2098}
2099
2100
2101/**
2102 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2103 */
2104IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2105{
2106#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2107 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2108#else
2109 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
2110#endif
2111}
2112
2113
2114
2115/*********************************************************************************************************************************
2116* Helpers: Flat memory fetches and stores. *
2117*********************************************************************************************************************************/
2118
2119/**
2120 * Used by TB code to load unsigned 8-bit data w/ flat address.
2121 * @note Zero extending the value to 64-bit to simplify assembly.
2122 */
2123IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2124{
2125#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2126 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2127#else
2128 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2129#endif
2130}
2131
2132
2133/**
2134 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2135 * to 16 bits.
2136 * @note Zero extending the value to 64-bit to simplify assembly.
2137 */
2138IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2139{
2140#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2141 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2142#else
2143 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2144#endif
2145}
2146
2147
2148/**
2149 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2150 * to 32 bits.
2151 * @note Zero extending the value to 64-bit to simplify assembly.
2152 */
2153IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2154{
2155#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2156 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2157#else
2158 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2159#endif
2160}
2161
2162
2163/**
2164 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2165 * to 64 bits.
2166 */
2167IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2168{
2169#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2170 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2171#else
2172 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2173#endif
2174}
2175
2176
2177/**
2178 * Used by TB code to load unsigned 16-bit data w/ flat address.
2179 * @note Zero extending the value to 64-bit to simplify assembly.
2180 */
2181IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2182{
2183#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2184 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2185#else
2186 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2187#endif
2188}
2189
2190
2191/**
2192 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2193 * to 32 bits.
2194 * @note Zero extending the value to 64-bit to simplify assembly.
2195 */
2196IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2197{
2198#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2199 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2200#else
2201 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2202#endif
2203}
2204
2205
2206/**
2207 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2208 * to 64 bits.
2209 * @note Zero extending the value to 64-bit to simplify assembly.
2210 */
2211IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2212{
2213#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2214 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2215#else
2216 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2217#endif
2218}
2219
2220
2221/**
2222 * Used by TB code to load unsigned 32-bit data w/ flat address.
2223 * @note Zero extending the value to 64-bit to simplify assembly.
2224 */
2225IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2226{
2227#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2228 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2229#else
2230 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2231#endif
2232}
2233
2234
2235/**
2236 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
2237 * to 64 bits.
2238 * @note Zero extending the value to 64-bit to simplify assembly.
2239 */
2240IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2241{
2242#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2243 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2244#else
2245 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2246#endif
2247}
2248
2249
2250/**
2251 * Used by TB code to load unsigned 64-bit data w/ flat address.
2252 */
2253IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2254{
2255#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2256 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2257#else
2258 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
2259#endif
2260}
2261
2262
2263#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2264/**
2265 * Used by TB code to load unsigned 128-bit data w/ flat address.
2266 */
2267IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
2268{
2269#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2270 return iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2271#else
2272 return iemMemFlatFetchDataU128Jmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2273#endif
2274}
2275
2276
2277/**
2278 * Used by TB code to load unsigned 128-bit data w/ flat address.
2279 */
2280IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
2281{
2282#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2283 return iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2284#else
2285 return iemMemFlatFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2286#endif
2287}
2288
2289
2290/**
2291 * Used by TB code to load unsigned 128-bit data w/ flat address.
2292 */
2293IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
2294{
2295#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2296 return iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2297#else
2298 return iemMemFlatFetchDataU128NoAcJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2299#endif
2300}
2301
2302
2303/**
2304 * Used by TB code to load unsigned 256-bit data w/ flat address.
2305 */
2306IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
2307{
2308#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2309 return iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
2310#else
2311 return iemMemFlatFetchDataU256NoAcJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
2312#endif
2313}
2314#endif
2315
2316
2317/**
2318 * Used by TB code to store unsigned 8-bit data w/ flat address.
2319 */
2320IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
2321{
2322#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2323 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
2324#else
2325 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
2326#endif
2327}
2328
2329
2330/**
2331 * Used by TB code to store unsigned 16-bit data w/ flat address.
2332 */
2333IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2334{
2335#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2336 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
2337#else
2338 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
2339#endif
2340}
2341
2342
2343/**
2344 * Used by TB code to store unsigned 32-bit data w/ flat address.
2345 */
2346IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2347{
2348#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2349 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
2350#else
2351 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
2352#endif
2353}
2354
2355
2356/**
2357 * Used by TB code to store unsigned 64-bit data w/ flat address.
2358 */
2359IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2360{
2361#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2362 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
2363#else
2364 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
2365#endif
2366}
2367
2368
2369#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2370/**
2371 * Used by TB code to store unsigned 128-bit data w/ flat address.
2372 */
2373IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
2374{
2375#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2376 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
2377#else
2378 iemMemFlatStoreDataU128AlignedSseJmp(pVCpu, GCPtrMem, pu128Src);
2379#endif
2380}
2381
2382
2383/**
2384 * Used by TB code to store unsigned 128-bit data w/ flat address.
2385 */
2386IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
2387{
2388#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2389 iemMemStoreDataU128NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
2390#else
2391 iemMemFlatStoreDataU128NoAcJmp(pVCpu, GCPtrMem, pu128Src);
2392#endif
2393}
2394
2395
2396/**
2397 * Used by TB code to store unsigned 256-bit data w/ flat address.
2398 */
2399IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
2400{
2401#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2402 iemMemStoreDataU256NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
2403#else
2404 iemMemFlatStoreDataU256NoAcJmp(pVCpu, GCPtrMem, pu256Src);
2405#endif
2406}
2407#endif
2408
2409
2410
2411/**
2412 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
2413 */
2414IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2415{
2416#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2417 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2418#else
2419 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2420#endif
2421}
2422
2423
2424/**
2425 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
2426 */
2427IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2428{
2429#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2430 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2431#else
2432 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2433#endif
2434}
2435
2436
2437/**
2438 * Used by TB code to store a segment selector value onto a flat stack.
2439 *
2440 * Intel CPUs doesn't do write a whole dword, thus the special function.
2441 */
2442IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2443{
2444#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2445 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2446#else
2447 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2448#endif
2449}
2450
2451
2452/**
2453 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
2454 */
2455IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2456{
2457#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2458 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2459#else
2460 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2461#endif
2462}
2463
2464
2465/**
2466 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2467 */
2468IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2469{
2470#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2471 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2472#else
2473 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
2474#endif
2475}
2476
2477
2478/**
2479 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2480 */
2481IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2482{
2483#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2484 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2485#else
2486 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
2487#endif
2488}
2489
2490
2491/**
2492 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2493 */
2494IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2495{
2496#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2497 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2498#else
2499 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
2500#endif
2501}
2502
2503
2504
2505/*********************************************************************************************************************************
2506* Helpers: Segmented memory mapping. *
2507*********************************************************************************************************************************/
2508
2509/**
2510 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
2511 * segmentation.
2512 */
2513IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2514 RTGCPTR GCPtrMem, uint8_t iSegReg))
2515{
2516#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2517 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2518#else
2519 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2520#endif
2521}
2522
2523
2524/**
2525 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
2526 */
2527IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2528 RTGCPTR GCPtrMem, uint8_t iSegReg))
2529{
2530#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2531 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2532#else
2533 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2534#endif
2535}
2536
2537
2538/**
2539 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
2540 */
2541IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2542 RTGCPTR GCPtrMem, uint8_t iSegReg))
2543{
2544#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2545 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2546#else
2547 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2548#endif
2549}
2550
2551
2552/**
2553 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
2554 */
2555IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2556 RTGCPTR GCPtrMem, uint8_t iSegReg))
2557{
2558#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2559 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2560#else
2561 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2562#endif
2563}
2564
2565
2566/**
2567 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
2568 * segmentation.
2569 */
2570IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2571 RTGCPTR GCPtrMem, uint8_t iSegReg))
2572{
2573#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2574 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2575#else
2576 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2577#endif
2578}
2579
2580
2581/**
2582 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
2583 */
2584IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2585 RTGCPTR GCPtrMem, uint8_t iSegReg))
2586{
2587#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2588 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2589#else
2590 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2591#endif
2592}
2593
2594
2595/**
2596 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
2597 */
2598IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2599 RTGCPTR GCPtrMem, uint8_t iSegReg))
2600{
2601#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2602 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2603#else
2604 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2605#endif
2606}
2607
2608
2609/**
2610 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
2611 */
2612IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2613 RTGCPTR GCPtrMem, uint8_t iSegReg))
2614{
2615#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2616 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2617#else
2618 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2619#endif
2620}
2621
2622
2623/**
2624 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
2625 * segmentation.
2626 */
2627IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2628 RTGCPTR GCPtrMem, uint8_t iSegReg))
2629{
2630#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2631 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2632#else
2633 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2634#endif
2635}
2636
2637
2638/**
2639 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
2640 */
2641IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2642 RTGCPTR GCPtrMem, uint8_t iSegReg))
2643{
2644#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2645 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2646#else
2647 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2648#endif
2649}
2650
2651
2652/**
2653 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
2654 */
2655IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2656 RTGCPTR GCPtrMem, uint8_t iSegReg))
2657{
2658#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2659 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2660#else
2661 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2662#endif
2663}
2664
2665
2666/**
2667 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
2668 */
2669IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2670 RTGCPTR GCPtrMem, uint8_t iSegReg))
2671{
2672#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2673 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2674#else
2675 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2676#endif
2677}
2678
2679
2680/**
2681 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
2682 * segmentation.
2683 */
2684IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2685 RTGCPTR GCPtrMem, uint8_t iSegReg))
2686{
2687#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2688 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2689#else
2690 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2691#endif
2692}
2693
2694
2695/**
2696 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
2697 */
2698IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2699 RTGCPTR GCPtrMem, uint8_t iSegReg))
2700{
2701#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2702 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2703#else
2704 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2705#endif
2706}
2707
2708
2709/**
2710 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
2711 */
2712IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2713 RTGCPTR GCPtrMem, uint8_t iSegReg))
2714{
2715#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2716 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2717#else
2718 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2719#endif
2720}
2721
2722
2723/**
2724 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
2725 */
2726IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2727 RTGCPTR GCPtrMem, uint8_t iSegReg))
2728{
2729#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2730 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2731#else
2732 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2733#endif
2734}
2735
2736
2737/**
2738 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
2739 */
2740IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2741 RTGCPTR GCPtrMem, uint8_t iSegReg))
2742{
2743#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2744 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2745#else
2746 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2747#endif
2748}
2749
2750
2751/**
2752 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
2753 */
2754IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2755 RTGCPTR GCPtrMem, uint8_t iSegReg))
2756{
2757#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2758 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2759#else
2760 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2761#endif
2762}
2763
2764
2765/**
2766 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
2767 * segmentation.
2768 */
2769IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2770 RTGCPTR GCPtrMem, uint8_t iSegReg))
2771{
2772#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2773 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2774#else
2775 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2776#endif
2777}
2778
2779
2780/**
2781 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
2782 */
2783IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2784 RTGCPTR GCPtrMem, uint8_t iSegReg))
2785{
2786#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2787 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2788#else
2789 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2790#endif
2791}
2792
2793
2794/**
2795 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
2796 */
2797IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2798 RTGCPTR GCPtrMem, uint8_t iSegReg))
2799{
2800#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2801 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2802#else
2803 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2804#endif
2805}
2806
2807
2808/**
2809 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
2810 */
2811IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2812 RTGCPTR GCPtrMem, uint8_t iSegReg))
2813{
2814#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2815 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2816#else
2817 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2818#endif
2819}
2820
2821
2822/*********************************************************************************************************************************
2823* Helpers: Flat memory mapping. *
2824*********************************************************************************************************************************/
2825
2826/**
2827 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
2828 * address.
2829 */
2830IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2831{
2832#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2833 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2834#else
2835 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2836#endif
2837}
2838
2839
2840/**
2841 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
2842 */
2843IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2844{
2845#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2846 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2847#else
2848 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2849#endif
2850}
2851
2852
2853/**
2854 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
2855 */
2856IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2857{
2858#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2859 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2860#else
2861 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2862#endif
2863}
2864
2865
2866/**
2867 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
2868 */
2869IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2870{
2871#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2872 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2873#else
2874 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2875#endif
2876}
2877
2878
2879/**
2880 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
2881 * address.
2882 */
2883IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2884{
2885#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2886 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2887#else
2888 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2889#endif
2890}
2891
2892
2893/**
2894 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
2895 */
2896IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2897{
2898#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2899 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2900#else
2901 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2902#endif
2903}
2904
2905
2906/**
2907 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
2908 */
2909IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2910{
2911#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2912 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2913#else
2914 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2915#endif
2916}
2917
2918
2919/**
2920 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
2921 */
2922IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2923{
2924#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2925 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2926#else
2927 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2928#endif
2929}
2930
2931
2932/**
2933 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
2934 * address.
2935 */
2936IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2937{
2938#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2939 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2940#else
2941 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2942#endif
2943}
2944
2945
2946/**
2947 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
2948 */
2949IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2950{
2951#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2952 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2953#else
2954 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2955#endif
2956}
2957
2958
2959/**
2960 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
2961 */
2962IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2963{
2964#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2965 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2966#else
2967 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2968#endif
2969}
2970
2971
2972/**
2973 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
2974 */
2975IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2976{
2977#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2978 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2979#else
2980 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2981#endif
2982}
2983
2984
2985/**
2986 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
2987 * address.
2988 */
2989IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2990{
2991#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2992 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2993#else
2994 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2995#endif
2996}
2997
2998
2999/**
3000 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
3001 */
3002IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3003{
3004#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3005 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3006#else
3007 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3008#endif
3009}
3010
3011
3012/**
3013 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
3014 */
3015IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3016{
3017#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3018 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3019#else
3020 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3021#endif
3022}
3023
3024
3025/**
3026 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
3027 */
3028IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3029{
3030#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3031 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3032#else
3033 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3034#endif
3035}
3036
3037
3038/**
3039 * Used by TB code to map 80-bit float data writeonly w/ flat address.
3040 */
3041IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3042{
3043#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3044 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3045#else
3046 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3047#endif
3048}
3049
3050
3051/**
3052 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
3053 */
3054IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3055{
3056#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3057 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3058#else
3059 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3060#endif
3061}
3062
3063
3064/**
3065 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
3066 * address.
3067 */
3068IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3069{
3070#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3071 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3072#else
3073 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3074#endif
3075}
3076
3077
3078/**
3079 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
3080 */
3081IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3082{
3083#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3084 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3085#else
3086 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3087#endif
3088}
3089
3090
3091/**
3092 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
3093 */
3094IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3095{
3096#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3097 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3098#else
3099 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3100#endif
3101}
3102
3103
3104/**
3105 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
3106 */
3107IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3108{
3109#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3110 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3111#else
3112 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3113#endif
3114}
3115
3116
3117/*********************************************************************************************************************************
3118* Helpers: Commit, rollback & unmap *
3119*********************************************************************************************************************************/
3120
3121/**
3122 * Used by TB code to commit and unmap a read-write memory mapping.
3123 */
3124IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3125{
3126 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
3127}
3128
3129
3130/**
3131 * Used by TB code to commit and unmap a read-write memory mapping.
3132 */
3133IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3134{
3135 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
3136}
3137
3138
3139/**
3140 * Used by TB code to commit and unmap a write-only memory mapping.
3141 */
3142IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3143{
3144 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
3145}
3146
3147
3148/**
3149 * Used by TB code to commit and unmap a read-only memory mapping.
3150 */
3151IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3152{
3153 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
3154}
3155
3156
3157/**
3158 * Reinitializes the native recompiler state.
3159 *
3160 * Called before starting a new recompile job.
3161 */
3162static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
3163{
3164 pReNative->cLabels = 0;
3165 pReNative->bmLabelTypes = 0;
3166 pReNative->cFixups = 0;
3167#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3168 pReNative->pDbgInfo->cEntries = 0;
3169#endif
3170 pReNative->pTbOrg = pTb;
3171 pReNative->cCondDepth = 0;
3172 pReNative->uCondSeqNo = 0;
3173 pReNative->uCheckIrqSeqNo = 0;
3174 pReNative->uTlbSeqNo = 0;
3175
3176#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3177 pReNative->Core.offPc = 0;
3178 pReNative->Core.cInstrPcUpdateSkipped = 0;
3179#endif
3180#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3181 pReNative->fSimdRaiseXcptChecksEmitted = 0;
3182#endif
3183 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
3184#if IEMNATIVE_HST_GREG_COUNT < 32
3185 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
3186#endif
3187 ;
3188 pReNative->Core.bmHstRegsWithGstShadow = 0;
3189 pReNative->Core.bmGstRegShadows = 0;
3190 pReNative->Core.bmVars = 0;
3191 pReNative->Core.bmStack = 0;
3192 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
3193 pReNative->Core.u64ArgVars = UINT64_MAX;
3194
3195 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 16);
3196 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
3197 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
3198 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
3199 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
3200 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
3201 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
3202 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
3203 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
3204 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
3205 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
3206 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
3207 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
3208 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
3209 pReNative->aidxUniqueLabels[13] = UINT32_MAX;
3210 pReNative->aidxUniqueLabels[14] = UINT32_MAX;
3211 pReNative->aidxUniqueLabels[15] = UINT32_MAX;
3212
3213 /* Full host register reinit: */
3214 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
3215 {
3216 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
3217 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
3218 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
3219 }
3220
3221 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
3222 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
3223#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3224 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
3225#endif
3226#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3227 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
3228#endif
3229#ifdef IEMNATIVE_REG_FIXED_TMP1
3230 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
3231#endif
3232#ifdef IEMNATIVE_REG_FIXED_PC_DBG
3233 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
3234#endif
3235 );
3236 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3237 {
3238 fRegs &= ~RT_BIT_32(idxReg);
3239 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
3240 }
3241
3242 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
3243#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3244 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
3245#endif
3246#ifdef IEMNATIVE_REG_FIXED_TMP0
3247 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3248#endif
3249#ifdef IEMNATIVE_REG_FIXED_TMP1
3250 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
3251#endif
3252#ifdef IEMNATIVE_REG_FIXED_PC_DBG
3253 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
3254#endif
3255
3256#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3257# ifdef RT_ARCH_ARM64
3258 /*
3259 * Arm64 has 32 128-bit registers only, in order to support emulating 256-bit registers we pair
3260 * two real registers statically to one virtual for now, leaving us with only 16 256-bit registers.
3261 * We always pair v0 with v1, v2 with v3, etc. so we mark the higher register as fixed here during init
3262 * and the register allocator assumes that it will be always free when the lower is picked.
3263 */
3264 uint32_t const fFixedAdditional = UINT32_C(0xaaaaaaaa);
3265# else
3266 uint32_t const fFixedAdditional = 0;
3267# endif
3268
3269 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
3270 | fFixedAdditional
3271# if IEMNATIVE_HST_SIMD_REG_COUNT < 32
3272 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
3273# endif
3274 ;
3275 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
3276 pReNative->Core.bmGstSimdRegShadows = 0;
3277 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
3278 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
3279
3280 /* Full host register reinit: */
3281 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
3282 {
3283 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
3284 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
3285 pReNative->Core.aHstSimdRegs[i].idxVar = UINT8_MAX;
3286 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
3287 }
3288
3289 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK | fFixedAdditional;
3290 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3291 {
3292 fRegs &= ~RT_BIT_32(idxReg);
3293 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
3294 }
3295
3296#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
3297 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3298#endif
3299
3300#endif
3301
3302 return pReNative;
3303}
3304
3305
3306/**
3307 * Allocates and initializes the native recompiler state.
3308 *
3309 * This is called the first time an EMT wants to recompile something.
3310 *
3311 * @returns Pointer to the new recompiler state.
3312 * @param pVCpu The cross context virtual CPU structure of the calling
3313 * thread.
3314 * @param pTb The TB that's about to be recompiled.
3315 * @thread EMT(pVCpu)
3316 */
3317static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
3318{
3319 VMCPU_ASSERT_EMT(pVCpu);
3320
3321 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
3322 AssertReturn(pReNative, NULL);
3323
3324 /*
3325 * Try allocate all the buffers and stuff we need.
3326 */
3327 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
3328 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
3329 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
3330#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3331 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
3332#endif
3333 if (RT_LIKELY( pReNative->pInstrBuf
3334 && pReNative->paLabels
3335 && pReNative->paFixups)
3336#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3337 && pReNative->pDbgInfo
3338#endif
3339 )
3340 {
3341 /*
3342 * Set the buffer & array sizes on success.
3343 */
3344 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
3345 pReNative->cLabelsAlloc = _8K;
3346 pReNative->cFixupsAlloc = _16K;
3347#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3348 pReNative->cDbgInfoAlloc = _16K;
3349#endif
3350
3351 /* Other constant stuff: */
3352 pReNative->pVCpu = pVCpu;
3353
3354 /*
3355 * Done, just need to save it and reinit it.
3356 */
3357 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
3358 return iemNativeReInit(pReNative, pTb);
3359 }
3360
3361 /*
3362 * Failed. Cleanup and return.
3363 */
3364 AssertFailed();
3365 RTMemFree(pReNative->pInstrBuf);
3366 RTMemFree(pReNative->paLabels);
3367 RTMemFree(pReNative->paFixups);
3368#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3369 RTMemFree(pReNative->pDbgInfo);
3370#endif
3371 RTMemFree(pReNative);
3372 return NULL;
3373}
3374
3375
3376/**
3377 * Creates a label
3378 *
3379 * If the label does not yet have a defined position,
3380 * call iemNativeLabelDefine() later to set it.
3381 *
3382 * @returns Label ID. Throws VBox status code on failure, so no need to check
3383 * the return value.
3384 * @param pReNative The native recompile state.
3385 * @param enmType The label type.
3386 * @param offWhere The instruction offset of the label. UINT32_MAX if the
3387 * label is not yet defined (default).
3388 * @param uData Data associated with the lable. Only applicable to
3389 * certain type of labels. Default is zero.
3390 */
3391DECL_HIDDEN_THROW(uint32_t)
3392iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3393 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
3394{
3395 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
3396
3397 /*
3398 * Locate existing label definition.
3399 *
3400 * This is only allowed for forward declarations where offWhere=UINT32_MAX
3401 * and uData is zero.
3402 */
3403 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3404 uint32_t const cLabels = pReNative->cLabels;
3405 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
3406#ifndef VBOX_STRICT
3407 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
3408 && offWhere == UINT32_MAX
3409 && uData == 0
3410#endif
3411 )
3412 {
3413#ifndef VBOX_STRICT
3414 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
3415 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3416 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
3417 if (idxLabel < pReNative->cLabels)
3418 return idxLabel;
3419#else
3420 for (uint32_t i = 0; i < cLabels; i++)
3421 if ( paLabels[i].enmType == enmType
3422 && paLabels[i].uData == uData)
3423 {
3424 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3425 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3426 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
3427 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
3428 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3429 return i;
3430 }
3431 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
3432 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3433#endif
3434 }
3435
3436 /*
3437 * Make sure we've got room for another label.
3438 */
3439 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
3440 { /* likely */ }
3441 else
3442 {
3443 uint32_t cNew = pReNative->cLabelsAlloc;
3444 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3445 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3446 cNew *= 2;
3447 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
3448 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
3449 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
3450 pReNative->paLabels = paLabels;
3451 pReNative->cLabelsAlloc = cNew;
3452 }
3453
3454 /*
3455 * Define a new label.
3456 */
3457 paLabels[cLabels].off = offWhere;
3458 paLabels[cLabels].enmType = enmType;
3459 paLabels[cLabels].uData = uData;
3460 pReNative->cLabels = cLabels + 1;
3461
3462 Assert((unsigned)enmType < 64);
3463 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
3464
3465 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3466 {
3467 Assert(uData == 0);
3468 pReNative->aidxUniqueLabels[enmType] = cLabels;
3469 }
3470
3471 if (offWhere != UINT32_MAX)
3472 {
3473#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3474 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3475 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
3476#endif
3477 }
3478 return cLabels;
3479}
3480
3481
3482/**
3483 * Defines the location of an existing label.
3484 *
3485 * @param pReNative The native recompile state.
3486 * @param idxLabel The label to define.
3487 * @param offWhere The position.
3488 */
3489DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
3490{
3491 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
3492 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
3493 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
3494 pLabel->off = offWhere;
3495#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3496 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3497 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
3498#endif
3499}
3500
3501
3502/**
3503 * Looks up a lable.
3504 *
3505 * @returns Label ID if found, UINT32_MAX if not.
3506 */
3507static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3508 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
3509{
3510 Assert((unsigned)enmType < 64);
3511 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
3512 {
3513 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3514 return pReNative->aidxUniqueLabels[enmType];
3515
3516 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3517 uint32_t const cLabels = pReNative->cLabels;
3518 for (uint32_t i = 0; i < cLabels; i++)
3519 if ( paLabels[i].enmType == enmType
3520 && paLabels[i].uData == uData
3521 && ( paLabels[i].off == offWhere
3522 || offWhere == UINT32_MAX
3523 || paLabels[i].off == UINT32_MAX))
3524 return i;
3525 }
3526 return UINT32_MAX;
3527}
3528
3529
3530/**
3531 * Adds a fixup.
3532 *
3533 * @throws VBox status code (int) on failure.
3534 * @param pReNative The native recompile state.
3535 * @param offWhere The instruction offset of the fixup location.
3536 * @param idxLabel The target label ID for the fixup.
3537 * @param enmType The fixup type.
3538 * @param offAddend Fixup addend if applicable to the type. Default is 0.
3539 */
3540DECL_HIDDEN_THROW(void)
3541iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
3542 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
3543{
3544 Assert(idxLabel <= UINT16_MAX);
3545 Assert((unsigned)enmType <= UINT8_MAX);
3546#ifdef RT_ARCH_ARM64
3547 AssertStmt( enmType != kIemNativeFixupType_RelImm14At5
3548 || pReNative->paLabels[idxLabel].enmType >= kIemNativeLabelType_LastWholeTbBranch,
3549 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_SHORT_JMP_TO_TAIL_LABEL));
3550#endif
3551
3552 /*
3553 * Make sure we've room.
3554 */
3555 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
3556 uint32_t const cFixups = pReNative->cFixups;
3557 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
3558 { /* likely */ }
3559 else
3560 {
3561 uint32_t cNew = pReNative->cFixupsAlloc;
3562 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3563 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3564 cNew *= 2;
3565 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
3566 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
3567 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
3568 pReNative->paFixups = paFixups;
3569 pReNative->cFixupsAlloc = cNew;
3570 }
3571
3572 /*
3573 * Add the fixup.
3574 */
3575 paFixups[cFixups].off = offWhere;
3576 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
3577 paFixups[cFixups].enmType = enmType;
3578 paFixups[cFixups].offAddend = offAddend;
3579 pReNative->cFixups = cFixups + 1;
3580}
3581
3582
3583/**
3584 * Slow code path for iemNativeInstrBufEnsure.
3585 */
3586DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
3587{
3588 /* Double the buffer size till we meet the request. */
3589 uint32_t cNew = pReNative->cInstrBufAlloc;
3590 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
3591 do
3592 cNew *= 2;
3593 while (cNew < off + cInstrReq);
3594
3595 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
3596#ifdef RT_ARCH_ARM64
3597 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
3598#else
3599 uint32_t const cbMaxInstrBuf = _2M;
3600#endif
3601 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
3602
3603 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
3604 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
3605
3606#ifdef VBOX_STRICT
3607 pReNative->offInstrBufChecked = off + cInstrReq;
3608#endif
3609 pReNative->cInstrBufAlloc = cNew;
3610 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
3611}
3612
3613#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3614
3615/**
3616 * Grows the static debug info array used during recompilation.
3617 *
3618 * @returns Pointer to the new debug info block; throws VBox status code on
3619 * failure, so no need to check the return value.
3620 */
3621DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3622{
3623 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
3624 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
3625 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
3626 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
3627 pReNative->pDbgInfo = pDbgInfo;
3628 pReNative->cDbgInfoAlloc = cNew;
3629 return pDbgInfo;
3630}
3631
3632
3633/**
3634 * Adds a new debug info uninitialized entry, returning the pointer to it.
3635 */
3636DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3637{
3638 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
3639 { /* likely */ }
3640 else
3641 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
3642 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
3643}
3644
3645
3646/**
3647 * Debug Info: Adds a native offset record, if necessary.
3648 */
3649DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3650{
3651 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
3652
3653 /*
3654 * Search backwards to see if we've got a similar record already.
3655 */
3656 uint32_t idx = pDbgInfo->cEntries;
3657 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
3658 while (idx-- > idxStop)
3659 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
3660 {
3661 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
3662 return;
3663 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
3664 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
3665 break;
3666 }
3667
3668 /*
3669 * Add it.
3670 */
3671 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
3672 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
3673 pEntry->NativeOffset.offNative = off;
3674}
3675
3676
3677/**
3678 * Debug Info: Record info about a label.
3679 */
3680static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
3681{
3682 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3683 pEntry->Label.uType = kIemTbDbgEntryType_Label;
3684 pEntry->Label.uUnused = 0;
3685 pEntry->Label.enmLabel = (uint8_t)enmType;
3686 pEntry->Label.uData = uData;
3687}
3688
3689
3690/**
3691 * Debug Info: Record info about a threaded call.
3692 */
3693static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
3694{
3695 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3696 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
3697 pEntry->ThreadedCall.fRecompiled = fRecompiled;
3698 pEntry->ThreadedCall.uUnused = 0;
3699 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
3700}
3701
3702
3703/**
3704 * Debug Info: Record info about a new guest instruction.
3705 */
3706static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
3707{
3708 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3709 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
3710 pEntry->GuestInstruction.uUnused = 0;
3711 pEntry->GuestInstruction.fExec = fExec;
3712}
3713
3714
3715/**
3716 * Debug Info: Record info about guest register shadowing.
3717 */
3718DECL_HIDDEN_THROW(void)
3719iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
3720 uint8_t idxHstReg /*= UINT8_MAX*/, uint8_t idxHstRegPrev /*= UINT8_MAX*/)
3721{
3722 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3723 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
3724 pEntry->GuestRegShadowing.uUnused = 0;
3725 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
3726 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
3727 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
3728}
3729
3730
3731# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3732/**
3733 * Debug Info: Record info about guest register shadowing.
3734 */
3735DECL_HIDDEN_THROW(void)
3736iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
3737 uint8_t idxHstSimdReg /*= UINT8_MAX*/, uint8_t idxHstSimdRegPrev /*= UINT8_MAX*/)
3738{
3739 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3740 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
3741 pEntry->GuestSimdRegShadowing.uUnused = 0;
3742 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
3743 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
3744 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
3745}
3746# endif
3747
3748
3749# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3750/**
3751 * Debug Info: Record info about delayed RIP updates.
3752 */
3753DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t offPc, uint32_t cInstrSkipped)
3754{
3755 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3756 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
3757 pEntry->DelayedPcUpdate.offPc = offPc;
3758 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
3759}
3760# endif
3761
3762#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
3763
3764
3765/*********************************************************************************************************************************
3766* Register Allocator *
3767*********************************************************************************************************************************/
3768
3769/**
3770 * Register parameter indexes (indexed by argument number).
3771 */
3772DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
3773{
3774 IEMNATIVE_CALL_ARG0_GREG,
3775 IEMNATIVE_CALL_ARG1_GREG,
3776 IEMNATIVE_CALL_ARG2_GREG,
3777 IEMNATIVE_CALL_ARG3_GREG,
3778#if defined(IEMNATIVE_CALL_ARG4_GREG)
3779 IEMNATIVE_CALL_ARG4_GREG,
3780# if defined(IEMNATIVE_CALL_ARG5_GREG)
3781 IEMNATIVE_CALL_ARG5_GREG,
3782# if defined(IEMNATIVE_CALL_ARG6_GREG)
3783 IEMNATIVE_CALL_ARG6_GREG,
3784# if defined(IEMNATIVE_CALL_ARG7_GREG)
3785 IEMNATIVE_CALL_ARG7_GREG,
3786# endif
3787# endif
3788# endif
3789#endif
3790};
3791AssertCompile(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3792
3793/**
3794 * Call register masks indexed by argument count.
3795 */
3796DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
3797{
3798 0,
3799 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
3800 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
3801 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
3802 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3803 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
3804#if defined(IEMNATIVE_CALL_ARG4_GREG)
3805 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3806 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
3807# if defined(IEMNATIVE_CALL_ARG5_GREG)
3808 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3809 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
3810# if defined(IEMNATIVE_CALL_ARG6_GREG)
3811 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3812 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3813 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
3814# if defined(IEMNATIVE_CALL_ARG7_GREG)
3815 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3816 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3817 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
3818# endif
3819# endif
3820# endif
3821#endif
3822};
3823
3824#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
3825/**
3826 * BP offset of the stack argument slots.
3827 *
3828 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
3829 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
3830 */
3831DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
3832{
3833 IEMNATIVE_FP_OFF_STACK_ARG0,
3834# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
3835 IEMNATIVE_FP_OFF_STACK_ARG1,
3836# endif
3837# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
3838 IEMNATIVE_FP_OFF_STACK_ARG2,
3839# endif
3840# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
3841 IEMNATIVE_FP_OFF_STACK_ARG3,
3842# endif
3843};
3844AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
3845#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
3846
3847/**
3848 * Info about shadowed guest register values.
3849 * @see IEMNATIVEGSTREG
3850 */
3851DECL_HIDDEN_CONST(IEMANTIVEGSTREGINFO const) g_aGstShadowInfo[] =
3852{
3853#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
3854 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
3855 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
3856 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
3857 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
3858 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
3859 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
3860 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
3861 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
3862 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
3863 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
3864 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
3865 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
3866 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
3867 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
3868 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
3869 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
3870 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
3871 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
3872 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
3873 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
3874 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
3875 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
3876 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
3877 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
3878 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
3879 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
3880 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
3881 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
3882 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
3883 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
3884 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
3885 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
3886 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
3887 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
3888 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
3889 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
3890 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
3891 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
3892 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
3893 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
3894 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
3895 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
3896 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
3897 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
3898 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
3899 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
3900 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
3901 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
3902#undef CPUMCTX_OFF_AND_SIZE
3903};
3904AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
3905
3906
3907/** Host CPU general purpose register names. */
3908DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
3909{
3910#ifdef RT_ARCH_AMD64
3911 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
3912#elif RT_ARCH_ARM64
3913 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
3914 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
3915#else
3916# error "port me"
3917#endif
3918};
3919
3920
3921#if 0 /* unused */
3922/**
3923 * Tries to locate a suitable register in the given register mask.
3924 *
3925 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3926 * failed.
3927 *
3928 * @returns Host register number on success, returns UINT8_MAX on failure.
3929 */
3930static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
3931{
3932 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3933 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3934 if (fRegs)
3935 {
3936 /** @todo pick better here: */
3937 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
3938
3939 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3940 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3941 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3942 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3943
3944 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3945 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3946 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3947 return idxReg;
3948 }
3949 return UINT8_MAX;
3950}
3951#endif /* unused */
3952
3953
3954/**
3955 * Locate a register, possibly freeing one up.
3956 *
3957 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3958 * failed.
3959 *
3960 * @returns Host register number on success. Returns UINT8_MAX if no registers
3961 * found, the caller is supposed to deal with this and raise a
3962 * allocation type specific status code (if desired).
3963 *
3964 * @throws VBox status code if we're run into trouble spilling a variable of
3965 * recording debug info. Does NOT throw anything if we're out of
3966 * registers, though.
3967 */
3968static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3969 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3970{
3971 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
3972 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3973 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3974
3975 /*
3976 * Try a freed register that's shadowing a guest register.
3977 */
3978 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3979 if (fRegs)
3980 {
3981 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
3982
3983#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3984 /*
3985 * When we have livness information, we use it to kick out all shadowed
3986 * guest register that will not be needed any more in this TB. If we're
3987 * lucky, this may prevent us from ending up here again.
3988 *
3989 * Note! We must consider the previous entry here so we don't free
3990 * anything that the current threaded function requires (current
3991 * entry is produced by the next threaded function).
3992 */
3993 uint32_t const idxCurCall = pReNative->idxCurCall;
3994 if (idxCurCall > 0)
3995 {
3996 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
3997
3998# ifndef IEMLIVENESS_EXTENDED_LAYOUT
3999 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
4000 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
4001 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
4002#else
4003 /* Construct a mask of the registers not in the read or write state.
4004 Note! We could skips writes, if they aren't from us, as this is just
4005 a hack to prevent trashing registers that have just been written
4006 or will be written when we retire the current instruction. */
4007 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
4008 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
4009 & IEMLIVENESSBIT_MASK;
4010#endif
4011 /* Merge EFLAGS. */
4012 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
4013 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
4014 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
4015 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
4016 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
4017
4018 /* If it matches any shadowed registers. */
4019 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
4020 {
4021 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
4022 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
4023 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
4024
4025 /* See if we've got any unshadowed registers we can return now. */
4026 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
4027 if (fUnshadowedRegs)
4028 {
4029 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
4030 return (fPreferVolatile
4031 ? ASMBitFirstSetU32(fUnshadowedRegs)
4032 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4033 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
4034 - 1;
4035 }
4036 }
4037 }
4038#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4039
4040 unsigned const idxReg = (fPreferVolatile
4041 ? ASMBitFirstSetU32(fRegs)
4042 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4043 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
4044 - 1;
4045
4046 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4047 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4048 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4049 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4050
4051 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4052 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4053 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4054 return idxReg;
4055 }
4056
4057 /*
4058 * Try free up a variable that's in a register.
4059 *
4060 * We do two rounds here, first evacuating variables we don't need to be
4061 * saved on the stack, then in the second round move things to the stack.
4062 */
4063 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
4064 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
4065 {
4066 uint32_t fVars = pReNative->Core.bmVars;
4067 while (fVars)
4068 {
4069 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
4070 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
4071 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
4072 && (RT_BIT_32(idxReg) & fRegMask)
4073 && ( iLoop == 0
4074 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
4075 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
4076 && !pReNative->Core.aVars[idxVar].fRegAcquired)
4077 {
4078 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
4079 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
4080 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4081 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4082 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4083 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
4084
4085 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
4086 {
4087 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
4088 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
4089 }
4090
4091 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4092 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
4093
4094 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4095 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4096 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4097 return idxReg;
4098 }
4099 fVars &= ~RT_BIT_32(idxVar);
4100 }
4101 }
4102
4103 return UINT8_MAX;
4104}
4105
4106
4107/**
4108 * Reassigns a variable to a different register specified by the caller.
4109 *
4110 * @returns The new code buffer position.
4111 * @param pReNative The native recompile state.
4112 * @param off The current code buffer position.
4113 * @param idxVar The variable index.
4114 * @param idxRegOld The old host register number.
4115 * @param idxRegNew The new host register number.
4116 * @param pszCaller The caller for logging.
4117 */
4118static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4119 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
4120{
4121 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4122 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
4123#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4124 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4125#endif
4126 RT_NOREF(pszCaller);
4127
4128 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
4129
4130 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
4131 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
4132 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
4133 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
4134
4135 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
4136 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
4137 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
4138 if (fGstRegShadows)
4139 {
4140 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
4141 | RT_BIT_32(idxRegNew);
4142 while (fGstRegShadows)
4143 {
4144 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4145 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4146
4147 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
4148 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
4149 }
4150 }
4151
4152 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
4153 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
4154 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
4155 return off;
4156}
4157
4158
4159/**
4160 * Moves a variable to a different register or spills it onto the stack.
4161 *
4162 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
4163 * kinds can easily be recreated if needed later.
4164 *
4165 * @returns The new code buffer position.
4166 * @param pReNative The native recompile state.
4167 * @param off The current code buffer position.
4168 * @param idxVar The variable index.
4169 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
4170 * call-volatile registers.
4171 */
4172DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4173 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_GREG_MASK*/)
4174{
4175 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4176 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4177 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
4178 Assert(!pVar->fRegAcquired);
4179
4180 uint8_t const idxRegOld = pVar->idxReg;
4181 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
4182 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
4183 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
4184 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
4185 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
4186 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4187 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
4188 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
4189
4190
4191 /** @todo Add statistics on this.*/
4192 /** @todo Implement basic variable liveness analysis (python) so variables
4193 * can be freed immediately once no longer used. This has the potential to
4194 * be trashing registers and stack for dead variables.
4195 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
4196
4197 /*
4198 * First try move it to a different register, as that's cheaper.
4199 */
4200 fForbiddenRegs |= RT_BIT_32(idxRegOld);
4201 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
4202 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
4203 if (fRegs)
4204 {
4205 /* Avoid using shadow registers, if possible. */
4206 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
4207 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
4208 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
4209 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
4210 }
4211
4212 /*
4213 * Otherwise we must spill the register onto the stack.
4214 */
4215 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
4216 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
4217 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
4218 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
4219
4220 pVar->idxReg = UINT8_MAX;
4221 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
4222 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
4223 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
4224 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
4225 return off;
4226}
4227
4228
4229/**
4230 * Allocates a temporary host general purpose register.
4231 *
4232 * This may emit code to save register content onto the stack in order to free
4233 * up a register.
4234 *
4235 * @returns The host register number; throws VBox status code on failure,
4236 * so no need to check the return value.
4237 * @param pReNative The native recompile state.
4238 * @param poff Pointer to the variable with the code buffer position.
4239 * This will be update if we need to move a variable from
4240 * register to stack in order to satisfy the request.
4241 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4242 * registers (@c true, default) or the other way around
4243 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4244 */
4245DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
4246{
4247 /*
4248 * Try find a completely unused register, preferably a call-volatile one.
4249 */
4250 uint8_t idxReg;
4251 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4252 & ~pReNative->Core.bmHstRegsWithGstShadow
4253 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
4254 if (fRegs)
4255 {
4256 if (fPreferVolatile)
4257 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4258 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4259 else
4260 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4261 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4262 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4263 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4264 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4265 }
4266 else
4267 {
4268 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
4269 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4270 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4271 }
4272 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4273}
4274
4275
4276/**
4277 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
4278 * registers.
4279 *
4280 * @returns The host register number; throws VBox status code on failure,
4281 * so no need to check the return value.
4282 * @param pReNative The native recompile state.
4283 * @param poff Pointer to the variable with the code buffer position.
4284 * This will be update if we need to move a variable from
4285 * register to stack in order to satisfy the request.
4286 * @param fRegMask Mask of acceptable registers.
4287 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4288 * registers (@c true, default) or the other way around
4289 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4290 */
4291DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
4292 bool fPreferVolatile /*= true*/)
4293{
4294 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
4295 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
4296
4297 /*
4298 * Try find a completely unused register, preferably a call-volatile one.
4299 */
4300 uint8_t idxReg;
4301 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4302 & ~pReNative->Core.bmHstRegsWithGstShadow
4303 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
4304 & fRegMask;
4305 if (fRegs)
4306 {
4307 if (fPreferVolatile)
4308 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4309 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4310 else
4311 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4312 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4313 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4314 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4315 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4316 }
4317 else
4318 {
4319 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
4320 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4321 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4322 }
4323 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4324}
4325
4326
4327/**
4328 * Allocates a temporary register for loading an immediate value into.
4329 *
4330 * This will emit code to load the immediate, unless there happens to be an
4331 * unused register with the value already loaded.
4332 *
4333 * The caller will not modify the returned register, it must be considered
4334 * read-only. Free using iemNativeRegFreeTmpImm.
4335 *
4336 * @returns The host register number; throws VBox status code on failure, so no
4337 * need to check the return value.
4338 * @param pReNative The native recompile state.
4339 * @param poff Pointer to the variable with the code buffer position.
4340 * @param uImm The immediate value that the register must hold upon
4341 * return.
4342 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4343 * registers (@c true, default) or the other way around
4344 * (@c false).
4345 *
4346 * @note Reusing immediate values has not been implemented yet.
4347 */
4348DECL_HIDDEN_THROW(uint8_t)
4349iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
4350{
4351 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
4352 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
4353 return idxReg;
4354}
4355
4356
4357/**
4358 * Allocates a temporary host general purpose register for keeping a guest
4359 * register value.
4360 *
4361 * Since we may already have a register holding the guest register value,
4362 * code will be emitted to do the loading if that's not the case. Code may also
4363 * be emitted if we have to free up a register to satify the request.
4364 *
4365 * @returns The host register number; throws VBox status code on failure, so no
4366 * need to check the return value.
4367 * @param pReNative The native recompile state.
4368 * @param poff Pointer to the variable with the code buffer
4369 * position. This will be update if we need to move a
4370 * variable from register to stack in order to satisfy
4371 * the request.
4372 * @param enmGstReg The guest register that will is to be updated.
4373 * @param enmIntendedUse How the caller will be using the host register.
4374 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
4375 * register is okay (default). The ASSUMPTION here is
4376 * that the caller has already flushed all volatile
4377 * registers, so this is only applied if we allocate a
4378 * new register.
4379 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
4380 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
4381 */
4382DECL_HIDDEN_THROW(uint8_t)
4383iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
4384 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
4385 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
4386{
4387 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4388#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4389 AssertMsg( fSkipLivenessAssert
4390 || pReNative->idxCurCall == 0
4391 || enmGstReg == kIemNativeGstReg_Pc
4392 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4393 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4394 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
4395 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4396 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
4397 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4398#endif
4399 RT_NOREF(fSkipLivenessAssert);
4400#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
4401 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
4402#endif
4403 uint32_t const fRegMask = !fNoVolatileRegs
4404 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
4405 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4406
4407 /*
4408 * First check if the guest register value is already in a host register.
4409 */
4410 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4411 {
4412 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4413 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4414 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4415 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4416
4417 /* It's not supposed to be allocated... */
4418 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4419 {
4420 /*
4421 * If the register will trash the guest shadow copy, try find a
4422 * completely unused register we can use instead. If that fails,
4423 * we need to disassociate the host reg from the guest reg.
4424 */
4425 /** @todo would be nice to know if preserving the register is in any way helpful. */
4426 /* If the purpose is calculations, try duplicate the register value as
4427 we'll be clobbering the shadow. */
4428 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
4429 && ( ~pReNative->Core.bmHstRegs
4430 & ~pReNative->Core.bmHstRegsWithGstShadow
4431 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
4432 {
4433 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
4434
4435 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4436
4437 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4438 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4439 g_apszIemNativeHstRegNames[idxRegNew]));
4440 idxReg = idxRegNew;
4441 }
4442 /* If the current register matches the restrictions, go ahead and allocate
4443 it for the caller. */
4444 else if (fRegMask & RT_BIT_32(idxReg))
4445 {
4446 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4447 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4448 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4449 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4450 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
4451 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4452 else
4453 {
4454 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
4455 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
4456 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4457 }
4458 }
4459 /* Otherwise, allocate a register that satisfies the caller and transfer
4460 the shadowing if compatible with the intended use. (This basically
4461 means the call wants a non-volatile register (RSP push/pop scenario).) */
4462 else
4463 {
4464 Assert(fNoVolatileRegs);
4465 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
4466 !fNoVolatileRegs
4467 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
4468 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4469 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4470 {
4471 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4472 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
4473 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
4474 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4475 }
4476 else
4477 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4478 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4479 g_apszIemNativeHstRegNames[idxRegNew]));
4480 idxReg = idxRegNew;
4481 }
4482 }
4483 else
4484 {
4485 /*
4486 * Oops. Shadowed guest register already allocated!
4487 *
4488 * Allocate a new register, copy the value and, if updating, the
4489 * guest shadow copy assignment to the new register.
4490 */
4491 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4492 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
4493 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
4494 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
4495
4496 /** @todo share register for readonly access. */
4497 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
4498 enmIntendedUse == kIemNativeGstRegUse_Calculation);
4499
4500 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4501 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4502
4503 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4504 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4505 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
4506 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4507 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4508 else
4509 {
4510 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4511 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
4512 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4513 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4514 }
4515 idxReg = idxRegNew;
4516 }
4517 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
4518
4519#ifdef VBOX_STRICT
4520 /* Strict builds: Check that the value is correct. */
4521 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4522#endif
4523
4524 return idxReg;
4525 }
4526
4527 /*
4528 * Allocate a new register, load it with the guest value and designate it as a copy of the
4529 */
4530 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
4531
4532 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4533 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
4534
4535 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4536 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
4537 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
4538 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4539
4540 return idxRegNew;
4541}
4542
4543
4544/**
4545 * Allocates a temporary host general purpose register that already holds the
4546 * given guest register value.
4547 *
4548 * The use case for this function is places where the shadowing state cannot be
4549 * modified due to branching and such. This will fail if the we don't have a
4550 * current shadow copy handy or if it's incompatible. The only code that will
4551 * be emitted here is value checking code in strict builds.
4552 *
4553 * The intended use can only be readonly!
4554 *
4555 * @returns The host register number, UINT8_MAX if not present.
4556 * @param pReNative The native recompile state.
4557 * @param poff Pointer to the instruction buffer offset.
4558 * Will be updated in strict builds if a register is
4559 * found.
4560 * @param enmGstReg The guest register that will is to be updated.
4561 * @note In strict builds, this may throw instruction buffer growth failures.
4562 * Non-strict builds will not throw anything.
4563 * @sa iemNativeRegAllocTmpForGuestReg
4564 */
4565DECL_HIDDEN_THROW(uint8_t)
4566iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4567{
4568 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4569#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4570 AssertMsg( pReNative->idxCurCall == 0
4571 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4572 || enmGstReg == kIemNativeGstReg_Pc,
4573 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4574#endif
4575
4576 /*
4577 * First check if the guest register value is already in a host register.
4578 */
4579 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4580 {
4581 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4582 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4583 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4584 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4585
4586 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4587 {
4588 /*
4589 * We only do readonly use here, so easy compared to the other
4590 * variant of this code.
4591 */
4592 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4593 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4594 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4595 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
4596 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4597
4598#ifdef VBOX_STRICT
4599 /* Strict builds: Check that the value is correct. */
4600 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4601#else
4602 RT_NOREF(poff);
4603#endif
4604 return idxReg;
4605 }
4606 }
4607
4608 return UINT8_MAX;
4609}
4610
4611
4612/**
4613 * Allocates argument registers for a function call.
4614 *
4615 * @returns New code buffer offset on success; throws VBox status code on failure, so no
4616 * need to check the return value.
4617 * @param pReNative The native recompile state.
4618 * @param off The current code buffer offset.
4619 * @param cArgs The number of arguments the function call takes.
4620 */
4621DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
4622{
4623 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
4624 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
4625 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4626 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4627
4628 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4629 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4630 else if (cArgs == 0)
4631 return true;
4632
4633 /*
4634 * Do we get luck and all register are free and not shadowing anything?
4635 */
4636 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
4637 for (uint32_t i = 0; i < cArgs; i++)
4638 {
4639 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4640 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4641 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4642 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4643 }
4644 /*
4645 * Okay, not lucky so we have to free up the registers.
4646 */
4647 else
4648 for (uint32_t i = 0; i < cArgs; i++)
4649 {
4650 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4651 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
4652 {
4653 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4654 {
4655 case kIemNativeWhat_Var:
4656 {
4657 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4658 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4659 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
4660 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4661 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
4662#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4663 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4664#endif
4665
4666 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
4667 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4668 else
4669 {
4670 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4671 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4672 }
4673 break;
4674 }
4675
4676 case kIemNativeWhat_Tmp:
4677 case kIemNativeWhat_Arg:
4678 case kIemNativeWhat_rc:
4679 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4680 default:
4681 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
4682 }
4683
4684 }
4685 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4686 {
4687 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4688 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4689 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4690 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4691 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4692 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4693 }
4694 else
4695 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4696 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4697 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4698 }
4699 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
4700 return true;
4701}
4702
4703
4704DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
4705
4706
4707#if 0
4708/**
4709 * Frees a register assignment of any type.
4710 *
4711 * @param pReNative The native recompile state.
4712 * @param idxHstReg The register to free.
4713 *
4714 * @note Does not update variables.
4715 */
4716DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4717{
4718 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4719 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4720 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
4721 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
4722 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
4723 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
4724 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
4725 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
4726 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
4727 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
4728 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4729 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4730 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4731 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4732
4733 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4734 /* no flushing, right:
4735 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4736 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4737 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4738 */
4739}
4740#endif
4741
4742
4743/**
4744 * Frees a temporary register.
4745 *
4746 * Any shadow copies of guest registers assigned to the host register will not
4747 * be flushed by this operation.
4748 */
4749DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4750{
4751 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4752 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
4753 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4754 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
4755 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4756}
4757
4758
4759/**
4760 * Frees a temporary immediate register.
4761 *
4762 * It is assumed that the call has not modified the register, so it still hold
4763 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
4764 */
4765DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4766{
4767 iemNativeRegFreeTmp(pReNative, idxHstReg);
4768}
4769
4770
4771/**
4772 * Frees a register assigned to a variable.
4773 *
4774 * The register will be disassociated from the variable.
4775 */
4776DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4777{
4778 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4779 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4780 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4781 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4782 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
4783#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4784 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4785#endif
4786
4787 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4788 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4789 if (!fFlushShadows)
4790 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4791 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4792 else
4793 {
4794 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4795 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4796 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4797 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4798 uint64_t fGstRegShadows = fGstRegShadowsOld;
4799 while (fGstRegShadows)
4800 {
4801 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4802 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4803
4804 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4805 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4806 }
4807 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4808 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4809 }
4810}
4811
4812
4813#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4814# ifdef LOG_ENABLED
4815/** Host CPU SIMD register names. */
4816DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
4817{
4818# ifdef RT_ARCH_AMD64
4819 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
4820# elif RT_ARCH_ARM64
4821 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
4822 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
4823# else
4824# error "port me"
4825# endif
4826};
4827# endif
4828
4829
4830/**
4831 * Frees a SIMD register assigned to a variable.
4832 *
4833 * The register will be disassociated from the variable.
4834 */
4835DECLHIDDEN(void) iemNativeSimdRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4836{
4837 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstReg));
4838 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4839 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
4840 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4841 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
4842 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4843
4844 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4845 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
4846 if (!fFlushShadows)
4847 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4848 g_apszIemNativeHstSimdRegNames[idxHstReg], pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows, idxVar));
4849 else
4850 {
4851 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4852 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows;
4853 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
4854 pReNative->Core.bmGstSimdRegShadows &= ~fGstRegShadowsOld;
4855 uint64_t fGstRegShadows = fGstRegShadowsOld;
4856 while (fGstRegShadows)
4857 {
4858 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4859 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4860
4861 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxHstReg);
4862 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = UINT8_MAX;
4863 }
4864 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4865 g_apszIemNativeHstSimdRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4866 }
4867}
4868#endif
4869
4870
4871/**
4872 * Called right before emitting a call instruction to move anything important
4873 * out of call-volatile registers, free and flush the call-volatile registers,
4874 * optionally freeing argument variables.
4875 *
4876 * @returns New code buffer offset, UINT32_MAX on failure.
4877 * @param pReNative The native recompile state.
4878 * @param off The code buffer offset.
4879 * @param cArgs The number of arguments the function call takes.
4880 * It is presumed that the host register part of these have
4881 * been allocated as such already and won't need moving,
4882 * just freeing.
4883 * @param fKeepVars Mask of variables that should keep their register
4884 * assignments. Caller must take care to handle these.
4885 */
4886DECL_HIDDEN_THROW(uint32_t)
4887iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4888{
4889 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4890
4891 /* fKeepVars will reduce this mask. */
4892 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4893
4894 /*
4895 * Move anything important out of volatile registers.
4896 */
4897 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4898 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4899 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4900#ifdef IEMNATIVE_REG_FIXED_TMP0
4901 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4902#endif
4903#ifdef IEMNATIVE_REG_FIXED_TMP1
4904 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
4905#endif
4906#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4907 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4908#endif
4909 & ~g_afIemNativeCallRegs[cArgs];
4910
4911 fRegsToMove &= pReNative->Core.bmHstRegs;
4912 if (!fRegsToMove)
4913 { /* likely */ }
4914 else
4915 {
4916 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4917 while (fRegsToMove != 0)
4918 {
4919 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4920 fRegsToMove &= ~RT_BIT_32(idxReg);
4921
4922 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4923 {
4924 case kIemNativeWhat_Var:
4925 {
4926 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4927 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4928 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4929 Assert(pVar->idxReg == idxReg);
4930 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4931 {
4932 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4933 idxVar, pVar->enmKind, pVar->idxReg));
4934 if (pVar->enmKind != kIemNativeVarKind_Stack)
4935 pVar->idxReg = UINT8_MAX;
4936 else
4937 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4938 }
4939 else
4940 fRegsToFree &= ~RT_BIT_32(idxReg);
4941 continue;
4942 }
4943
4944 case kIemNativeWhat_Arg:
4945 AssertMsgFailed(("What?!?: %u\n", idxReg));
4946 continue;
4947
4948 case kIemNativeWhat_rc:
4949 case kIemNativeWhat_Tmp:
4950 AssertMsgFailed(("Missing free: %u\n", idxReg));
4951 continue;
4952
4953 case kIemNativeWhat_FixedTmp:
4954 case kIemNativeWhat_pVCpuFixed:
4955 case kIemNativeWhat_pCtxFixed:
4956 case kIemNativeWhat_PcShadow:
4957 case kIemNativeWhat_FixedReserved:
4958 case kIemNativeWhat_Invalid:
4959 case kIemNativeWhat_End:
4960 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4961 }
4962 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4963 }
4964 }
4965
4966 /*
4967 * Do the actual freeing.
4968 */
4969 if (pReNative->Core.bmHstRegs & fRegsToFree)
4970 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4971 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4972 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4973
4974 /* If there are guest register shadows in any call-volatile register, we
4975 have to clear the corrsponding guest register masks for each register. */
4976 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4977 if (fHstRegsWithGstShadow)
4978 {
4979 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4980 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
4981 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4982 do
4983 {
4984 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4985 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4986
4987 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4988 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4989 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4990 } while (fHstRegsWithGstShadow != 0);
4991 }
4992
4993 return off;
4994}
4995
4996
4997/**
4998 * Flushes a set of guest register shadow copies.
4999 *
5000 * This is usually done after calling a threaded function or a C-implementation
5001 * of an instruction.
5002 *
5003 * @param pReNative The native recompile state.
5004 * @param fGstRegs Set of guest registers to flush.
5005 */
5006DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
5007{
5008 /*
5009 * Reduce the mask by what's currently shadowed
5010 */
5011 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
5012 fGstRegs &= bmGstRegShadowsOld;
5013 if (fGstRegs)
5014 {
5015 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
5016 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
5017 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
5018 if (bmGstRegShadowsNew)
5019 {
5020 /*
5021 * Partial.
5022 */
5023 do
5024 {
5025 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5026 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5027 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5028 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
5029 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5030
5031 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
5032 fGstRegs &= ~fInThisHstReg;
5033 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5034 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5035 if (!fGstRegShadowsNew)
5036 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5037 } while (fGstRegs != 0);
5038 }
5039 else
5040 {
5041 /*
5042 * Clear all.
5043 */
5044 do
5045 {
5046 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5047 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5048 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5049 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
5050 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5051
5052 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5053 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5054 } while (fGstRegs != 0);
5055 pReNative->Core.bmHstRegsWithGstShadow = 0;
5056 }
5057 }
5058}
5059
5060
5061/**
5062 * Flushes guest register shadow copies held by a set of host registers.
5063 *
5064 * This is used with the TLB lookup code for ensuring that we don't carry on
5065 * with any guest shadows in volatile registers, as these will get corrupted by
5066 * a TLB miss.
5067 *
5068 * @param pReNative The native recompile state.
5069 * @param fHstRegs Set of host registers to flush guest shadows for.
5070 */
5071DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
5072{
5073 /*
5074 * Reduce the mask by what's currently shadowed.
5075 */
5076 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
5077 fHstRegs &= bmHstRegsWithGstShadowOld;
5078 if (fHstRegs)
5079 {
5080 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
5081 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
5082 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
5083 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
5084 if (bmHstRegsWithGstShadowNew)
5085 {
5086 /*
5087 * Partial (likely).
5088 */
5089 uint64_t fGstShadows = 0;
5090 do
5091 {
5092 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5093 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
5094 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
5095 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
5096
5097 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5098 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5099 fHstRegs &= ~RT_BIT_32(idxHstReg);
5100 } while (fHstRegs != 0);
5101 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
5102 }
5103 else
5104 {
5105 /*
5106 * Clear all.
5107 */
5108 do
5109 {
5110 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5111 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
5112 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
5113 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
5114
5115 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5116 fHstRegs &= ~RT_BIT_32(idxHstReg);
5117 } while (fHstRegs != 0);
5118 pReNative->Core.bmGstRegShadows = 0;
5119 }
5120 }
5121}
5122
5123
5124/**
5125 * Restores guest shadow copies in volatile registers.
5126 *
5127 * This is used after calling a helper function (think TLB miss) to restore the
5128 * register state of volatile registers.
5129 *
5130 * @param pReNative The native recompile state.
5131 * @param off The code buffer offset.
5132 * @param fHstRegsActiveShadows Set of host registers which are allowed to
5133 * be active (allocated) w/o asserting. Hack.
5134 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
5135 * iemNativeVarRestoreVolatileRegsPostHlpCall()
5136 */
5137DECL_HIDDEN_THROW(uint32_t)
5138iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
5139{
5140 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
5141 if (fHstRegs)
5142 {
5143 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
5144 do
5145 {
5146 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5147
5148 /* It's not fatal if a register is active holding a variable that
5149 shadowing a guest register, ASSUMING all pending guest register
5150 writes were flushed prior to the helper call. However, we'll be
5151 emitting duplicate restores, so it wasts code space. */
5152 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
5153 RT_NOREF(fHstRegsActiveShadows);
5154
5155 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5156 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
5157 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
5158 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
5159
5160 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
5161 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
5162
5163 fHstRegs &= ~RT_BIT_32(idxHstReg);
5164 } while (fHstRegs != 0);
5165 }
5166 return off;
5167}
5168
5169
5170
5171
5172/*********************************************************************************************************************************
5173* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
5174*********************************************************************************************************************************/
5175#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5176
5177/**
5178 * Info about shadowed guest SIMD register values.
5179 * @see IEMNATIVEGSTSIMDREG
5180 */
5181static struct
5182{
5183 /** Offset in VMCPU of XMM (low 128-bit) registers. */
5184 uint32_t offXmm;
5185 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
5186 uint32_t offYmm;
5187 /** Name (for logging). */
5188 const char *pszName;
5189} const g_aGstSimdShadowInfo[] =
5190{
5191#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
5192 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
5193 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
5194 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
5195 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
5196 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
5197 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
5198 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
5199 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
5200 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
5201 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
5202 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
5203 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
5204 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
5205 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
5206 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
5207 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
5208 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
5209#undef CPUMCTX_OFF_AND_SIZE
5210};
5211AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
5212
5213
5214/**
5215 * Frees a temporary SIMD register.
5216 *
5217 * Any shadow copies of guest registers assigned to the host register will not
5218 * be flushed by this operation.
5219 */
5220DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
5221{
5222 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
5223 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
5224 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5225 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
5226 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5227}
5228
5229
5230/**
5231 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
5232 *
5233 * @returns New code bufferoffset.
5234 * @param pReNative The native recompile state.
5235 * @param off Current code buffer position.
5236 * @param enmGstSimdReg The guest SIMD register to flush.
5237 */
5238DECL_HIDDEN_THROW(uint32_t)
5239iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
5240{
5241 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5242
5243 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
5244 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
5245 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg),
5246 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)));
5247
5248 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
5249 {
5250 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5251 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
5252 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5253 }
5254
5255 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))
5256 {
5257 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5258 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
5259 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5260 }
5261
5262 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, enmGstSimdReg);
5263 return off;
5264}
5265
5266
5267/**
5268 * Locate a register, possibly freeing one up.
5269 *
5270 * This ASSUMES the caller has done the minimal/optimal allocation checks and
5271 * failed.
5272 *
5273 * @returns Host register number on success. Returns UINT8_MAX if no registers
5274 * found, the caller is supposed to deal with this and raise a
5275 * allocation type specific status code (if desired).
5276 *
5277 * @throws VBox status code if we're run into trouble spilling a variable of
5278 * recording debug info. Does NOT throw anything if we're out of
5279 * registers, though.
5280 */
5281static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
5282 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
5283{
5284 //STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
5285 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5286 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5287
5288 /*
5289 * Try a freed register that's shadowing a guest register.
5290 */
5291 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
5292 if (fRegs)
5293 {
5294 //STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
5295
5296#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5297 /*
5298 * When we have livness information, we use it to kick out all shadowed
5299 * guest register that will not be needed any more in this TB. If we're
5300 * lucky, this may prevent us from ending up here again.
5301 *
5302 * Note! We must consider the previous entry here so we don't free
5303 * anything that the current threaded function requires (current
5304 * entry is produced by the next threaded function).
5305 */
5306 uint32_t const idxCurCall = pReNative->idxCurCall;
5307 if (idxCurCall > 0)
5308 {
5309 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
5310
5311# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5312 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
5313 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
5314 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
5315#else
5316 /* Construct a mask of the registers not in the read or write state.
5317 Note! We could skips writes, if they aren't from us, as this is just
5318 a hack to prevent trashing registers that have just been written
5319 or will be written when we retire the current instruction. */
5320 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5321 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5322 & IEMLIVENESSBIT_MASK;
5323#endif
5324 /* If it matches any shadowed registers. */
5325 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
5326 {
5327 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
5328 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
5329 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
5330
5331 /* See if we've got any unshadowed registers we can return now. */
5332 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
5333 if (fUnshadowedRegs)
5334 {
5335 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
5336 return (fPreferVolatile
5337 ? ASMBitFirstSetU32(fUnshadowedRegs)
5338 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
5339 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
5340 - 1;
5341 }
5342 }
5343 }
5344#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5345
5346 unsigned const idxReg = (fPreferVolatile
5347 ? ASMBitFirstSetU32(fRegs)
5348 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5349 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
5350 - 1;
5351
5352 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
5353 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
5354 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5355 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
5356
5357 /* We need to flush any pending guest register writes this host SIMD register shadows. */
5358 uint32_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5359 uint32_t idxGstSimdReg = 0;
5360 do
5361 {
5362 if (fGstRegShadows & 0x1)
5363 {
5364 *poff = iemNativeSimdRegFlushPendingWrite(pReNative, *poff, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5365 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg));
5366 }
5367 idxGstSimdReg++;
5368 fGstRegShadows >>= 1;
5369 } while (fGstRegShadows);
5370
5371 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5372 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5373 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5374 pReNative->Core.aHstSimdRegs[idxReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5375 return idxReg;
5376 }
5377
5378 /*
5379 * Try free up a variable that's in a register.
5380 *
5381 * We do two rounds here, first evacuating variables we don't need to be
5382 * saved on the stack, then in the second round move things to the stack.
5383 */
5384 //STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
5385 AssertReleaseFailed(); /** @todo No variable support right now. */
5386#if 0
5387 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
5388 {
5389 uint32_t fVars = pReNative->Core.bmSimdVars;
5390 while (fVars)
5391 {
5392 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
5393 uint8_t const idxReg = pReNative->Core.aSimdVars[idxVar].idxReg;
5394 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
5395 && (RT_BIT_32(idxReg) & fRegMask)
5396 && ( iLoop == 0
5397 ? pReNative->Core.aSimdVars[idxVar].enmKind != kIemNativeVarKind_Stack
5398 : pReNative->Core.aSimdVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5399 && !pReNative->Core.aSimdVars[idxVar].fRegAcquired)
5400 {
5401 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
5402 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
5403 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5404 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5405 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
5406 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
5407
5408 if (pReNative->Core.aSimdVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5409 {
5410 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
5411 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
5412 }
5413
5414 pReNative->Core.aSimdVars[idxVar].idxReg = UINT8_MAX;
5415 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
5416
5417 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5418 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5419 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5420 return idxReg;
5421 }
5422 fVars &= ~RT_BIT_32(idxVar);
5423 }
5424 }
5425#endif
5426
5427 AssertFailed();
5428 return UINT8_MAX;
5429}
5430
5431
5432/**
5433 * Flushes a set of guest register shadow copies.
5434 *
5435 * This is usually done after calling a threaded function or a C-implementation
5436 * of an instruction.
5437 *
5438 * @param pReNative The native recompile state.
5439 * @param fGstSimdRegs Set of guest SIMD registers to flush.
5440 */
5441DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
5442{
5443 /*
5444 * Reduce the mask by what's currently shadowed
5445 */
5446 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
5447 fGstSimdRegs &= bmGstSimdRegShadows;
5448 if (fGstSimdRegs)
5449 {
5450 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
5451 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
5452 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
5453 if (bmGstSimdRegShadowsNew)
5454 {
5455 /*
5456 * Partial.
5457 */
5458 do
5459 {
5460 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5461 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5462 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5463 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5464 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5465 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5466
5467 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
5468 fGstSimdRegs &= ~fInThisHstReg;
5469 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5470 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5471 if (!fGstRegShadowsNew)
5472 {
5473 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5474 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5475 }
5476 } while (fGstSimdRegs != 0);
5477 }
5478 else
5479 {
5480 /*
5481 * Clear all.
5482 */
5483 do
5484 {
5485 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5486 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5487 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5488 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5489 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5490 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5491
5492 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5493 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
5494 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5495 } while (fGstSimdRegs != 0);
5496 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
5497 }
5498 }
5499}
5500
5501
5502/**
5503 * Allocates a temporary host SIMD register.
5504 *
5505 * This may emit code to save register content onto the stack in order to free
5506 * up a register.
5507 *
5508 * @returns The host register number; throws VBox status code on failure,
5509 * so no need to check the return value.
5510 * @param pReNative The native recompile state.
5511 * @param poff Pointer to the variable with the code buffer position.
5512 * This will be update if we need to move a variable from
5513 * register to stack in order to satisfy the request.
5514 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5515 * registers (@c true, default) or the other way around
5516 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5517 */
5518DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
5519{
5520 /*
5521 * Try find a completely unused register, preferably a call-volatile one.
5522 */
5523 uint8_t idxSimdReg;
5524 uint32_t fRegs = ~pReNative->Core.bmHstRegs
5525 & ~pReNative->Core.bmHstRegsWithGstShadow
5526 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
5527 if (fRegs)
5528 {
5529 if (fPreferVolatile)
5530 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5531 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5532 else
5533 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5534 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5535 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5536 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5537 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5538 }
5539 else
5540 {
5541 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
5542 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5543 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5544 }
5545
5546 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5547 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5548}
5549
5550
5551/**
5552 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
5553 * registers.
5554 *
5555 * @returns The host register number; throws VBox status code on failure,
5556 * so no need to check the return value.
5557 * @param pReNative The native recompile state.
5558 * @param poff Pointer to the variable with the code buffer position.
5559 * This will be update if we need to move a variable from
5560 * register to stack in order to satisfy the request.
5561 * @param fRegMask Mask of acceptable registers.
5562 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5563 * registers (@c true, default) or the other way around
5564 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5565 */
5566DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
5567 bool fPreferVolatile /*= true*/)
5568{
5569 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5570 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5571
5572 /*
5573 * Try find a completely unused register, preferably a call-volatile one.
5574 */
5575 uint8_t idxSimdReg;
5576 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
5577 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5578 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
5579 & fRegMask;
5580 if (fRegs)
5581 {
5582 if (fPreferVolatile)
5583 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5584 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5585 else
5586 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5587 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5588 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5589 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5590 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5591 }
5592 else
5593 {
5594 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
5595 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5596 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5597 }
5598
5599 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5600 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5601}
5602
5603
5604/**
5605 * Sets the indiactor for which part of the given SIMD register has valid data loaded.
5606 *
5607 * @param pReNative The native recompile state.
5608 * @param idxHstSimdReg The host SIMD register to update the state for.
5609 * @param enmLoadSz The load size to set.
5610 */
5611DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg,
5612 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5613{
5614 /* Everything valid already? -> nothing to do. */
5615 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5616 return;
5617
5618 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid)
5619 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
5620 else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz)
5621 {
5622 Assert( ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128
5623 && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5624 || ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128
5625 && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128));
5626 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256;
5627 }
5628}
5629
5630
5631static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdRegDst,
5632 uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
5633{
5634 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
5635 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst
5636 || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5637 {
5638# ifdef RT_ARCH_ARM64
5639 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
5640 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
5641# endif
5642
5643 if (idxHstSimdRegDst != idxHstSimdRegSrc)
5644 {
5645 switch (enmLoadSzDst)
5646 {
5647 case kIemNativeGstSimdRegLdStSz_256:
5648 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5649 break;
5650 case kIemNativeGstSimdRegLdStSz_Low128:
5651 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5652 break;
5653 case kIemNativeGstSimdRegLdStSz_High128:
5654 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst + 1, idxHstSimdRegSrc + 1);
5655 break;
5656 default:
5657 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5658 }
5659
5660 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst);
5661 }
5662 }
5663 else
5664 {
5665 /* Complicated stuff where the source is currently missing something, later. */
5666 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5667 }
5668
5669 return off;
5670}
5671
5672
5673/**
5674 * Allocates a temporary host SIMD register for keeping a guest
5675 * SIMD register value.
5676 *
5677 * Since we may already have a register holding the guest register value,
5678 * code will be emitted to do the loading if that's not the case. Code may also
5679 * be emitted if we have to free up a register to satify the request.
5680 *
5681 * @returns The host register number; throws VBox status code on failure, so no
5682 * need to check the return value.
5683 * @param pReNative The native recompile state.
5684 * @param poff Pointer to the variable with the code buffer
5685 * position. This will be update if we need to move a
5686 * variable from register to stack in order to satisfy
5687 * the request.
5688 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
5689 * @param enmIntendedUse How the caller will be using the host register.
5690 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
5691 * register is okay (default). The ASSUMPTION here is
5692 * that the caller has already flushed all volatile
5693 * registers, so this is only applied if we allocate a
5694 * new register.
5695 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
5696 */
5697DECL_HIDDEN_THROW(uint8_t)
5698iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
5699 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
5700 bool fNoVolatileRegs /*= false*/)
5701{
5702 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
5703#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
5704 AssertMsg( pReNative->idxCurCall == 0
5705 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5706 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5707 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
5708 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5709 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
5710 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
5711#endif
5712#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5713 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
5714#endif
5715 uint32_t const fRegMask = !fNoVolatileRegs
5716 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
5717 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
5718
5719 /*
5720 * First check if the guest register value is already in a host register.
5721 */
5722 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
5723 {
5724 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5725 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
5726 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
5727 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
5728
5729 /* It's not supposed to be allocated... */
5730 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
5731 {
5732 /*
5733 * If the register will trash the guest shadow copy, try find a
5734 * completely unused register we can use instead. If that fails,
5735 * we need to disassociate the host reg from the guest reg.
5736 */
5737 /** @todo would be nice to know if preserving the register is in any way helpful. */
5738 /* If the purpose is calculations, try duplicate the register value as
5739 we'll be clobbering the shadow. */
5740 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
5741 && ( ~pReNative->Core.bmHstSimdRegs
5742 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5743 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
5744 {
5745 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
5746
5747 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5748
5749 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5750 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5751 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5752 idxSimdReg = idxRegNew;
5753 }
5754 /* If the current register matches the restrictions, go ahead and allocate
5755 it for the caller. */
5756 else if (fRegMask & RT_BIT_32(idxSimdReg))
5757 {
5758 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5759 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
5760 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5761 {
5762 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5763 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxSimdReg, idxSimdReg, enmLoadSz);
5764 else
5765 iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz);
5766 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
5767 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5768 }
5769 else
5770 {
5771 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
5772 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
5773 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
5774 }
5775 }
5776 /* Otherwise, allocate a register that satisfies the caller and transfer
5777 the shadowing if compatible with the intended use. (This basically
5778 means the call wants a non-volatile register (RSP push/pop scenario).) */
5779 else
5780 {
5781 Assert(fNoVolatileRegs);
5782 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
5783 !fNoVolatileRegs
5784 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
5785 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5786 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5787 {
5788 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5789 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
5790 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
5791 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5792 }
5793 else
5794 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5795 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5796 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5797 idxSimdReg = idxRegNew;
5798 }
5799 }
5800 else
5801 {
5802 /*
5803 * Oops. Shadowed guest register already allocated!
5804 *
5805 * Allocate a new register, copy the value and, if updating, the
5806 * guest shadow copy assignment to the new register.
5807 */
5808 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5809 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
5810 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
5811 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
5812
5813 /** @todo share register for readonly access. */
5814 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
5815 enmIntendedUse == kIemNativeGstRegUse_Calculation);
5816
5817 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5818 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5819 else
5820 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5821
5822 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5823 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5824 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
5825 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5826 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5827 else
5828 {
5829 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5830 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
5831 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5832 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5833 }
5834 idxSimdReg = idxRegNew;
5835 }
5836 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
5837
5838#ifdef VBOX_STRICT
5839 /* Strict builds: Check that the value is correct. */
5840 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5841 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
5842#endif
5843
5844 return idxSimdReg;
5845 }
5846
5847 /*
5848 * Allocate a new register, load it with the guest value and designate it as a copy of the
5849 */
5850 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
5851
5852 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5853 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
5854 else
5855 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5856
5857 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5858 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
5859
5860 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
5861 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5862
5863 return idxRegNew;
5864}
5865
5866#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5867
5868
5869
5870/*********************************************************************************************************************************
5871* Code emitters for flushing pending guest register writes and sanity checks *
5872*********************************************************************************************************************************/
5873
5874#ifdef VBOX_STRICT
5875/**
5876 * Does internal register allocator sanity checks.
5877 */
5878DECLHIDDEN(void) iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
5879{
5880 /*
5881 * Iterate host registers building a guest shadowing set.
5882 */
5883 uint64_t bmGstRegShadows = 0;
5884 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
5885 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
5886 while (bmHstRegsWithGstShadow)
5887 {
5888 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
5889 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5890 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5891
5892 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5893 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
5894 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
5895 bmGstRegShadows |= fThisGstRegShadows;
5896 while (fThisGstRegShadows)
5897 {
5898 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
5899 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
5900 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
5901 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
5902 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
5903 }
5904 }
5905 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
5906 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
5907 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
5908
5909 /*
5910 * Now the other way around, checking the guest to host index array.
5911 */
5912 bmHstRegsWithGstShadow = 0;
5913 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
5914 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5915 while (bmGstRegShadows)
5916 {
5917 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
5918 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5919 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
5920
5921 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5922 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
5923 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
5924 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
5925 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5926 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
5927 }
5928 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
5929 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
5930 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
5931}
5932#endif /* VBOX_STRICT */
5933
5934
5935/**
5936 * Flushes any delayed guest register writes.
5937 *
5938 * This must be called prior to calling CImpl functions and any helpers that use
5939 * the guest state (like raising exceptions) and such.
5940 *
5941 * This optimization has not yet been implemented. The first target would be
5942 * RIP updates, since these are the most common ones.
5943 */
5944DECL_HIDDEN_THROW(uint32_t)
5945iemNativeRegFlushPendingWritesSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept, bool fFlushShadows)
5946{
5947#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5948 if (!(fGstShwExcept & kIemNativeGstReg_Pc))
5949 off = iemNativeEmitPcWriteback(pReNative, off);
5950#else
5951 RT_NOREF(pReNative, fGstShwExcept);
5952#endif
5953
5954#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5955 /** @todo r=bird: There must be a quicker way to check if anything needs
5956 * doing and then call simd function to do the flushing */
5957 /** @todo This doesn't mix well with fGstShwExcept but we ignore this for now and just flush everything. */
5958 for (uint8_t idxGstSimdReg = 0; idxGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo); idxGstSimdReg++)
5959 {
5960 Assert( (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg)
5961 || !IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg)));
5962
5963 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg))
5964 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5965
5966 if ( fFlushShadows
5967 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg))
5968 {
5969 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxGstSimdReg];
5970
5971 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
5972 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg)));
5973 }
5974 }
5975#else
5976 RT_NOREF(pReNative, fGstShwExcept, fFlushShadows);
5977#endif
5978
5979 return off;
5980}
5981
5982
5983#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5984/**
5985 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
5986 */
5987DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcWritebackSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5988{
5989 Assert(pReNative->Core.offPc);
5990# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5991 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5992 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, pReNative->Core.cInstrPcUpdateSkipped);
5993# endif
5994
5995# ifndef IEMNATIVE_REG_FIXED_PC_DBG
5996 /* Allocate a temporary PC register. */
5997 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5998
5999 /* Perform the addition and store the result. */
6000 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
6001 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6002
6003 /* Free but don't flush the PC register. */
6004 iemNativeRegFreeTmp(pReNative, idxPcReg);
6005# else
6006 /* Compare the shadow with the context value, they should match. */
6007 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
6008 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
6009# endif
6010
6011 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, pReNative->Core.cInstrPcUpdateSkipped);
6012 pReNative->Core.offPc = 0;
6013 pReNative->Core.cInstrPcUpdateSkipped = 0;
6014
6015 return off;
6016}
6017#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
6018
6019
6020/*********************************************************************************************************************************
6021* Code Emitters (larger snippets) *
6022*********************************************************************************************************************************/
6023
6024/**
6025 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
6026 * extending to 64-bit width.
6027 *
6028 * @returns New code buffer offset on success, UINT32_MAX on failure.
6029 * @param pReNative .
6030 * @param off The current code buffer position.
6031 * @param idxHstReg The host register to load the guest register value into.
6032 * @param enmGstReg The guest register to load.
6033 *
6034 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6035 * that is something the caller needs to do if applicable.
6036 */
6037DECL_HIDDEN_THROW(uint32_t)
6038iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
6039{
6040 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
6041 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
6042
6043 switch (g_aGstShadowInfo[enmGstReg].cb)
6044 {
6045 case sizeof(uint64_t):
6046 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6047 case sizeof(uint32_t):
6048 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6049 case sizeof(uint16_t):
6050 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6051#if 0 /* not present in the table. */
6052 case sizeof(uint8_t):
6053 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6054#endif
6055 default:
6056 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6057 }
6058}
6059
6060
6061#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6062/**
6063 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
6064 *
6065 * @returns New code buffer offset on success, UINT32_MAX on failure.
6066 * @param pReNative The recompiler state.
6067 * @param off The current code buffer position.
6068 * @param idxHstSimdReg The host register to load the guest register value into.
6069 * @param enmGstSimdReg The guest register to load.
6070 * @param enmLoadSz The load size of the register.
6071 *
6072 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6073 * that is something the caller needs to do if applicable.
6074 */
6075DECL_HIDDEN_THROW(uint32_t)
6076iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
6077 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6078{
6079 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
6080
6081 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz);
6082 switch (enmLoadSz)
6083 {
6084 case kIemNativeGstSimdRegLdStSz_256:
6085 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6086 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6087 case kIemNativeGstSimdRegLdStSz_Low128:
6088 return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6089 case kIemNativeGstSimdRegLdStSz_High128:
6090 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6091 default:
6092 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6093 }
6094}
6095#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6096
6097#ifdef VBOX_STRICT
6098
6099/**
6100 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
6101 *
6102 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6103 * Trashes EFLAGS on AMD64.
6104 */
6105DECL_HIDDEN_THROW(uint32_t)
6106iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
6107{
6108# ifdef RT_ARCH_AMD64
6109 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6110
6111 /* rol reg64, 32 */
6112 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6113 pbCodeBuf[off++] = 0xc1;
6114 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6115 pbCodeBuf[off++] = 32;
6116
6117 /* test reg32, ffffffffh */
6118 if (idxReg >= 8)
6119 pbCodeBuf[off++] = X86_OP_REX_B;
6120 pbCodeBuf[off++] = 0xf7;
6121 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6122 pbCodeBuf[off++] = 0xff;
6123 pbCodeBuf[off++] = 0xff;
6124 pbCodeBuf[off++] = 0xff;
6125 pbCodeBuf[off++] = 0xff;
6126
6127 /* je/jz +1 */
6128 pbCodeBuf[off++] = 0x74;
6129 pbCodeBuf[off++] = 0x01;
6130
6131 /* int3 */
6132 pbCodeBuf[off++] = 0xcc;
6133
6134 /* rol reg64, 32 */
6135 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6136 pbCodeBuf[off++] = 0xc1;
6137 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6138 pbCodeBuf[off++] = 32;
6139
6140# elif defined(RT_ARCH_ARM64)
6141 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6142 /* lsr tmp0, reg64, #32 */
6143 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
6144 /* cbz tmp0, +1 */
6145 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6146 /* brk #0x1100 */
6147 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
6148
6149# else
6150# error "Port me!"
6151# endif
6152 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6153 return off;
6154}
6155
6156
6157/**
6158 * Emitting code that checks that the content of register @a idxReg is the same
6159 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
6160 * instruction if that's not the case.
6161 *
6162 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6163 * Trashes EFLAGS on AMD64.
6164 */
6165DECL_HIDDEN_THROW(uint32_t)
6166iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
6167{
6168# ifdef RT_ARCH_AMD64
6169 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6170
6171 /* cmp reg, [mem] */
6172 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
6173 {
6174 if (idxReg >= 8)
6175 pbCodeBuf[off++] = X86_OP_REX_R;
6176 pbCodeBuf[off++] = 0x38;
6177 }
6178 else
6179 {
6180 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
6181 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
6182 else
6183 {
6184 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
6185 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6186 else
6187 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
6188 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
6189 if (idxReg >= 8)
6190 pbCodeBuf[off++] = X86_OP_REX_R;
6191 }
6192 pbCodeBuf[off++] = 0x39;
6193 }
6194 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
6195
6196 /* je/jz +1 */
6197 pbCodeBuf[off++] = 0x74;
6198 pbCodeBuf[off++] = 0x01;
6199
6200 /* int3 */
6201 pbCodeBuf[off++] = 0xcc;
6202
6203 /* For values smaller than the register size, we must check that the rest
6204 of the register is all zeros. */
6205 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
6206 {
6207 /* test reg64, imm32 */
6208 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6209 pbCodeBuf[off++] = 0xf7;
6210 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6211 pbCodeBuf[off++] = 0;
6212 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
6213 pbCodeBuf[off++] = 0xff;
6214 pbCodeBuf[off++] = 0xff;
6215
6216 /* je/jz +1 */
6217 pbCodeBuf[off++] = 0x74;
6218 pbCodeBuf[off++] = 0x01;
6219
6220 /* int3 */
6221 pbCodeBuf[off++] = 0xcc;
6222 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6223 }
6224 else
6225 {
6226 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6227 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
6228 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
6229 }
6230
6231# elif defined(RT_ARCH_ARM64)
6232 /* mov TMP0, [gstreg] */
6233 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
6234
6235 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6236 /* sub tmp0, tmp0, idxReg */
6237 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
6238 /* cbz tmp0, +1 */
6239 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6240 /* brk #0x1000+enmGstReg */
6241 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
6242 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6243
6244# else
6245# error "Port me!"
6246# endif
6247 return off;
6248}
6249
6250
6251# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6252# ifdef RT_ARCH_AMD64
6253/**
6254 * Helper for AMD64 to emit code which checks the low 128-bits of the given SIMD register against the given vCPU offset.
6255 */
6256DECL_FORCE_INLINE_THROW(uint32_t) iemNativeEmitGuestSimdRegValueCheckVCpuU128(uint8_t * const pbCodeBuf, uint32_t off, uint8_t idxSimdReg, uint32_t offVCpu)
6257{
6258 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
6259 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6260 if (idxSimdReg >= 8)
6261 pbCodeBuf[off++] = X86_OP_REX_R;
6262 pbCodeBuf[off++] = 0x0f;
6263 pbCodeBuf[off++] = 0x38;
6264 pbCodeBuf[off++] = 0x29;
6265 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxSimdReg, offVCpu);
6266
6267 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
6268 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6269 pbCodeBuf[off++] = X86_OP_REX_W
6270 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
6271 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6272 pbCodeBuf[off++] = 0x0f;
6273 pbCodeBuf[off++] = 0x3a;
6274 pbCodeBuf[off++] = 0x16;
6275 pbCodeBuf[off++] = 0xeb;
6276 pbCodeBuf[off++] = 0x00;
6277
6278 /* cmp tmp0, 0xffffffffffffffff. */
6279 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6280 pbCodeBuf[off++] = 0x83;
6281 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6282 pbCodeBuf[off++] = 0xff;
6283
6284 /* je/jz +1 */
6285 pbCodeBuf[off++] = 0x74;
6286 pbCodeBuf[off++] = 0x01;
6287
6288 /* int3 */
6289 pbCodeBuf[off++] = 0xcc;
6290
6291 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
6292 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6293 pbCodeBuf[off++] = X86_OP_REX_W
6294 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
6295 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6296 pbCodeBuf[off++] = 0x0f;
6297 pbCodeBuf[off++] = 0x3a;
6298 pbCodeBuf[off++] = 0x16;
6299 pbCodeBuf[off++] = 0xeb;
6300 pbCodeBuf[off++] = 0x01;
6301
6302 /* cmp tmp0, 0xffffffffffffffff. */
6303 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6304 pbCodeBuf[off++] = 0x83;
6305 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6306 pbCodeBuf[off++] = 0xff;
6307
6308 /* je/jz +1 */
6309 pbCodeBuf[off++] = 0x74;
6310 pbCodeBuf[off++] = 0x01;
6311
6312 /* int3 */
6313 pbCodeBuf[off++] = 0xcc;
6314
6315 return off;
6316}
6317# endif
6318
6319
6320/**
6321 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
6322 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
6323 * instruction if that's not the case.
6324 *
6325 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
6326 * Trashes EFLAGS on AMD64.
6327 */
6328DECL_HIDDEN_THROW(uint32_t)
6329iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
6330 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6331{
6332 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
6333 if ( ( enmLoadSz == kIemNativeGstSimdRegLdStSz_256
6334 && ( IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)
6335 || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6336 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128
6337 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
6338 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_High128
6339 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6340 return off;
6341
6342# ifdef RT_ARCH_AMD64
6343 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6344 {
6345 /* movdqa vectmp0, idxSimdReg */
6346 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6347
6348 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
6349
6350 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6351 g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6352 }
6353
6354 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6355 {
6356 /* Due to the fact that CPUMCTX stores the high 128-bit separately we need to do this all over again for the high part. */
6357 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 50);
6358
6359 /* vextracti128 vectmp0, idxSimdReg, 1 */
6360 pbCodeBuf[off++] = X86_OP_VEX3;
6361 pbCodeBuf[off++] = (idxSimdReg < 8 ? X86_OP_VEX3_BYTE1_R : 0)
6362 | X86_OP_VEX3_BYTE1_X
6363 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? X86_OP_VEX3_BYTE1_B : 0)
6364 | 0x03; /* Opcode map */
6365 pbCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX3_BYTE2_P_066H);
6366 pbCodeBuf[off++] = 0x39;
6367 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxSimdReg & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
6368 pbCodeBuf[off++] = 0x01;
6369
6370 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6371 g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6372 }
6373# elif defined(RT_ARCH_ARM64)
6374 /* mov vectmp0, [gstreg] */
6375 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
6376
6377 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6378 {
6379 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6380 /* eor vectmp0, vectmp0, idxSimdReg */
6381 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6382 /* cnt vectmp0, vectmp0, #0*/
6383 pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0);
6384 /* umov tmp0, vectmp0.D[0] */
6385 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6386 0 /*idxElem*/, kArmv8InstrUmovInsSz_U64);
6387 /* cbz tmp0, +1 */
6388 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6389 /* brk #0x1000+enmGstReg */
6390 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6391 }
6392
6393 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6394 {
6395 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6396 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
6397 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg + 1);
6398 /* cnt vectmp0 + 1, vectmp0 + 1, #0*/
6399 pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1);
6400 /* umov tmp0, (vectmp0 + 1).D[0] */
6401 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
6402 0 /*idxElem*/, kArmv8InstrUmovInsSz_U64);
6403 /* cbz tmp0, +1 */
6404 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6405 /* brk #0x1000+enmGstReg */
6406 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6407 }
6408
6409# else
6410# error "Port me!"
6411# endif
6412
6413 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6414 return off;
6415}
6416# endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6417
6418
6419/**
6420 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
6421 * important bits.
6422 *
6423 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6424 * Trashes EFLAGS on AMD64.
6425 */
6426DECL_HIDDEN_THROW(uint32_t)
6427iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
6428{
6429 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6430 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
6431 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
6432 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
6433
6434#ifdef RT_ARCH_AMD64
6435 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6436
6437 /* je/jz +1 */
6438 pbCodeBuf[off++] = 0x74;
6439 pbCodeBuf[off++] = 0x01;
6440
6441 /* int3 */
6442 pbCodeBuf[off++] = 0xcc;
6443
6444# elif defined(RT_ARCH_ARM64)
6445 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6446
6447 /* b.eq +1 */
6448 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
6449 /* brk #0x2000 */
6450 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
6451
6452# else
6453# error "Port me!"
6454# endif
6455 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6456
6457 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6458 return off;
6459}
6460
6461#endif /* VBOX_STRICT */
6462
6463
6464#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6465/**
6466 * Worker for IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK.
6467 */
6468DECL_HIDDEN_THROW(uint32_t)
6469iemNativeEmitEFlagsSkippingCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflNeeded)
6470{
6471 uint32_t const offVCpu = RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags);
6472
6473 fEflNeeded &= X86_EFL_STATUS_BITS;
6474 if (fEflNeeded)
6475 {
6476# ifdef RT_ARCH_AMD64
6477 /* test dword [pVCpu + offVCpu], imm32 */
6478 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6479 if (fEflNeeded <= 0xff)
6480 {
6481 pCodeBuf[off++] = 0xf6;
6482 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6483 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6484 }
6485 else
6486 {
6487 pCodeBuf[off++] = 0xf7;
6488 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6489 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6490 pCodeBuf[off++] = RT_BYTE2(fEflNeeded);
6491 pCodeBuf[off++] = RT_BYTE3(fEflNeeded);
6492 pCodeBuf[off++] = RT_BYTE4(fEflNeeded);
6493 }
6494 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6495
6496# else
6497 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6498 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, offVCpu);
6499 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp, fEflNeeded);
6500# ifdef RT_ARCH_ARM64
6501 off = iemNativeEmitJzToFixed(pReNative, off, off + 2);
6502 off = iemNativeEmitBrk(pReNative, off, 0x7777);
6503# else
6504# error "Port me!"
6505# endif
6506 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6507# endif
6508 }
6509 return off;
6510}
6511#endif /* IEMNATIVE_STRICT_EFLAGS_SKIPPING */
6512
6513
6514/**
6515 * Emits a code for checking the return code of a call and rcPassUp, returning
6516 * from the code if either are non-zero.
6517 */
6518DECL_HIDDEN_THROW(uint32_t)
6519iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6520{
6521#ifdef RT_ARCH_AMD64
6522 /*
6523 * AMD64: eax = call status code.
6524 */
6525
6526 /* edx = rcPassUp */
6527 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6528# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6529 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
6530# endif
6531
6532 /* edx = eax | rcPassUp */
6533 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6534 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
6535 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
6536 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6537
6538 /* Jump to non-zero status return path. */
6539 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
6540
6541 /* done. */
6542
6543#elif RT_ARCH_ARM64
6544 /*
6545 * ARM64: w0 = call status code.
6546 */
6547# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6548 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
6549# endif
6550 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6551
6552 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6553
6554 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
6555
6556 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6557 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6558 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
6559
6560#else
6561# error "port me"
6562#endif
6563 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6564 RT_NOREF_PV(idxInstr);
6565 return off;
6566}
6567
6568
6569/**
6570 * Emits code to check if the content of @a idxAddrReg is a canonical address,
6571 * raising a \#GP(0) if it isn't.
6572 *
6573 * @returns New code buffer offset, UINT32_MAX on failure.
6574 * @param pReNative The native recompile state.
6575 * @param off The code buffer offset.
6576 * @param idxAddrReg The host register with the address to check.
6577 * @param idxInstr The current instruction.
6578 */
6579DECL_HIDDEN_THROW(uint32_t)
6580iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
6581{
6582 /*
6583 * Make sure we don't have any outstanding guest register writes as we may
6584 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6585 */
6586 off = iemNativeRegFlushPendingWrites(pReNative, off);
6587
6588#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6589 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6590#else
6591 RT_NOREF(idxInstr);
6592#endif
6593
6594#ifdef RT_ARCH_AMD64
6595 /*
6596 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
6597 * return raisexcpt();
6598 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
6599 */
6600 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6601
6602 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
6603 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
6604 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
6605 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
6606 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6607
6608 iemNativeRegFreeTmp(pReNative, iTmpReg);
6609
6610#elif defined(RT_ARCH_ARM64)
6611 /*
6612 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
6613 * return raisexcpt();
6614 * ----
6615 * mov x1, 0x800000000000
6616 * add x1, x0, x1
6617 * cmp xzr, x1, lsr 48
6618 * b.ne .Lraisexcpt
6619 */
6620 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6621
6622 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
6623 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
6624 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
6625 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6626
6627 iemNativeRegFreeTmp(pReNative, iTmpReg);
6628
6629#else
6630# error "Port me"
6631#endif
6632 return off;
6633}
6634
6635
6636/**
6637 * Emits code to check if that the content of @a idxAddrReg is within the limit
6638 * of CS, raising a \#GP(0) if it isn't.
6639 *
6640 * @returns New code buffer offset; throws VBox status code on error.
6641 * @param pReNative The native recompile state.
6642 * @param off The code buffer offset.
6643 * @param idxAddrReg The host register (32-bit) with the address to
6644 * check.
6645 * @param idxInstr The current instruction.
6646 */
6647DECL_HIDDEN_THROW(uint32_t)
6648iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6649 uint8_t idxAddrReg, uint8_t idxInstr)
6650{
6651 /*
6652 * Make sure we don't have any outstanding guest register writes as we may
6653 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6654 */
6655 off = iemNativeRegFlushPendingWrites(pReNative, off);
6656
6657#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6658 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6659#else
6660 RT_NOREF(idxInstr);
6661#endif
6662
6663 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
6664 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
6665 kIemNativeGstRegUse_ReadOnly);
6666
6667 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
6668 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6669
6670 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
6671 return off;
6672}
6673
6674
6675/**
6676 * Emits a call to a CImpl function or something similar.
6677 */
6678DECL_HIDDEN_THROW(uint32_t)
6679iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
6680 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
6681{
6682 /* Writeback everything. */
6683 off = iemNativeRegFlushPendingWrites(pReNative, off);
6684
6685 /*
6686 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
6687 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
6688 */
6689 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
6690 fGstShwFlush
6691 | RT_BIT_64(kIemNativeGstReg_Pc)
6692 | RT_BIT_64(kIemNativeGstReg_EFlags));
6693 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6694
6695 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6696
6697 /*
6698 * Load the parameters.
6699 */
6700#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
6701 /* Special code the hidden VBOXSTRICTRC pointer. */
6702 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6703 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6704 if (cAddParams > 0)
6705 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
6706 if (cAddParams > 1)
6707 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
6708 if (cAddParams > 2)
6709 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
6710 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6711
6712#else
6713 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6714 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6715 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6716 if (cAddParams > 0)
6717 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
6718 if (cAddParams > 1)
6719 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
6720 if (cAddParams > 2)
6721# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
6722 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
6723# else
6724 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
6725# endif
6726#endif
6727
6728 /*
6729 * Make the call.
6730 */
6731 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
6732
6733#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6734 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6735#endif
6736
6737 /*
6738 * Check the status code.
6739 */
6740 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
6741}
6742
6743
6744/**
6745 * Emits a call to a threaded worker function.
6746 */
6747DECL_HIDDEN_THROW(uint32_t)
6748iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6749{
6750 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6751
6752 /* We don't know what the threaded function is doing so we must flush all pending writes. */
6753 off = iemNativeRegFlushPendingWrites(pReNative, off);
6754
6755 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
6756 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6757
6758#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6759 /* The threaded function may throw / long jmp, so set current instruction
6760 number if we're counting. */
6761 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6762#endif
6763
6764 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
6765
6766#ifdef RT_ARCH_AMD64
6767 /* Load the parameters and emit the call. */
6768# ifdef RT_OS_WINDOWS
6769# ifndef VBOXSTRICTRC_STRICT_ENABLED
6770 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6771 if (cParams > 0)
6772 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
6773 if (cParams > 1)
6774 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
6775 if (cParams > 2)
6776 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
6777# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
6778 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
6779 if (cParams > 0)
6780 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
6781 if (cParams > 1)
6782 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
6783 if (cParams > 2)
6784 {
6785 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
6786 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
6787 }
6788 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6789# endif /* VBOXSTRICTRC_STRICT_ENABLED */
6790# else
6791 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6792 if (cParams > 0)
6793 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
6794 if (cParams > 1)
6795 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
6796 if (cParams > 2)
6797 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
6798# endif
6799
6800 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6801
6802# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6803 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6804# endif
6805
6806#elif RT_ARCH_ARM64
6807 /*
6808 * ARM64:
6809 */
6810 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6811 if (cParams > 0)
6812 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
6813 if (cParams > 1)
6814 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
6815 if (cParams > 2)
6816 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
6817
6818 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6819
6820#else
6821# error "port me"
6822#endif
6823
6824 /*
6825 * Check the status code.
6826 */
6827 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
6828
6829 return off;
6830}
6831
6832#ifdef VBOX_WITH_STATISTICS
6833/**
6834 * Emits code to update the thread call statistics.
6835 */
6836DECL_INLINE_THROW(uint32_t)
6837iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6838{
6839 /*
6840 * Update threaded function stats.
6841 */
6842 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
6843 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
6844# if defined(RT_ARCH_ARM64)
6845 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6846 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6847 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
6848 iemNativeRegFreeTmp(pReNative, idxTmp1);
6849 iemNativeRegFreeTmp(pReNative, idxTmp2);
6850# else
6851 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
6852# endif
6853 return off;
6854}
6855#endif /* VBOX_WITH_STATISTICS */
6856
6857
6858/**
6859 * Emits the code at the ReturnWithFlags label (returns
6860 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
6861 */
6862static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6863{
6864 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
6865 if (idxLabel != UINT32_MAX)
6866 {
6867 iemNativeLabelDefine(pReNative, idxLabel, off);
6868
6869 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
6870
6871 /* jump back to the return sequence. */
6872 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6873 }
6874 return off;
6875}
6876
6877
6878/**
6879 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
6880 */
6881static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6882{
6883 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
6884 if (idxLabel != UINT32_MAX)
6885 {
6886 iemNativeLabelDefine(pReNative, idxLabel, off);
6887
6888 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
6889
6890 /* jump back to the return sequence. */
6891 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6892 }
6893 return off;
6894}
6895
6896
6897/**
6898 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
6899 */
6900static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6901{
6902 /*
6903 * Generate the rc + rcPassUp fiddling code if needed.
6904 */
6905 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6906 if (idxLabel != UINT32_MAX)
6907 {
6908 iemNativeLabelDefine(pReNative, idxLabel, off);
6909
6910 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
6911#ifdef RT_ARCH_AMD64
6912# ifdef RT_OS_WINDOWS
6913# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6914 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
6915# endif
6916 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6917 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
6918# else
6919 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6920 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
6921# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6922 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
6923# endif
6924# endif
6925# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6926 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
6927# endif
6928
6929#else
6930 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
6931 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6932 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
6933#endif
6934
6935 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
6936 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6937 }
6938 return off;
6939}
6940
6941
6942/**
6943 * Emits a standard epilog.
6944 */
6945static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
6946{
6947 *pidxReturnLabel = UINT32_MAX;
6948
6949 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
6950 off = iemNativeRegFlushPendingWrites(pReNative, off);
6951
6952 /*
6953 * Successful return, so clear the return register (eax, w0).
6954 */
6955 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
6956
6957 /*
6958 * Define label for common return point.
6959 */
6960 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
6961 *pidxReturnLabel = idxReturn;
6962
6963 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6964
6965 /*
6966 * Restore registers and return.
6967 */
6968#ifdef RT_ARCH_AMD64
6969 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6970
6971 /* Reposition esp at the r15 restore point. */
6972 pbCodeBuf[off++] = X86_OP_REX_W;
6973 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
6974 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
6975 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
6976
6977 /* Pop non-volatile registers and return */
6978 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
6979 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
6980 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
6981 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
6982 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
6983 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
6984 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
6985 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
6986# ifdef RT_OS_WINDOWS
6987 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
6988 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
6989# endif
6990 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
6991 pbCodeBuf[off++] = 0xc9; /* leave */
6992 pbCodeBuf[off++] = 0xc3; /* ret */
6993 pbCodeBuf[off++] = 0xcc; /* int3 poison */
6994
6995#elif RT_ARCH_ARM64
6996 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6997
6998 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
6999 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
7000 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
7001 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
7002 IEMNATIVE_FRAME_VAR_SIZE / 8);
7003 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
7004 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7005 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
7006 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7007 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
7008 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7009 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
7010 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7011 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
7012 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7013 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
7014 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
7015
7016 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
7017 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
7018 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
7019 IEMNATIVE_FRAME_SAVE_REG_SIZE);
7020
7021 /* retab / ret */
7022# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
7023 if (1)
7024 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
7025 else
7026# endif
7027 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
7028
7029#else
7030# error "port me"
7031#endif
7032 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7033
7034 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
7035}
7036
7037
7038/**
7039 * Emits a standard prolog.
7040 */
7041static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7042{
7043#ifdef RT_ARCH_AMD64
7044 /*
7045 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
7046 * reserving 64 bytes for stack variables plus 4 non-register argument
7047 * slots. Fixed register assignment: xBX = pReNative;
7048 *
7049 * Since we always do the same register spilling, we can use the same
7050 * unwind description for all the code.
7051 */
7052 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7053 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
7054 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
7055 pbCodeBuf[off++] = 0x8b;
7056 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
7057 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
7058 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
7059# ifdef RT_OS_WINDOWS
7060 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
7061 pbCodeBuf[off++] = 0x8b;
7062 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
7063 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
7064 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
7065# else
7066 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
7067 pbCodeBuf[off++] = 0x8b;
7068 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
7069# endif
7070 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
7071 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
7072 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
7073 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
7074 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
7075 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
7076 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
7077 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
7078
7079# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
7080 /* Save the frame pointer. */
7081 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
7082# endif
7083
7084 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
7085 X86_GREG_xSP,
7086 IEMNATIVE_FRAME_ALIGN_SIZE
7087 + IEMNATIVE_FRAME_VAR_SIZE
7088 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
7089 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
7090 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
7091 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
7092 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
7093
7094#elif RT_ARCH_ARM64
7095 /*
7096 * We set up a stack frame exactly like on x86, only we have to push the
7097 * return address our selves here. We save all non-volatile registers.
7098 */
7099 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
7100
7101# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
7102 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
7103 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
7104 * in any way conditional, so just emitting this instructions now and hoping for the best... */
7105 /* pacibsp */
7106 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
7107# endif
7108
7109 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
7110 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
7111 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
7112 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
7113 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
7114 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
7115 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7116 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
7117 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7118 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
7119 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7120 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
7121 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7122 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
7123 /* Save the BP and LR (ret address) registers at the top of the frame. */
7124 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7125 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
7126 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
7127 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
7128 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
7129 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
7130
7131 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
7132 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
7133
7134 /* mov r28, r0 */
7135 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
7136 /* mov r27, r1 */
7137 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
7138
7139# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
7140 /* Save the frame pointer. */
7141 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
7142 ARMV8_A64_REG_X2);
7143# endif
7144
7145#else
7146# error "port me"
7147#endif
7148 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7149 return off;
7150}
7151
7152
7153/*********************************************************************************************************************************
7154* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
7155*********************************************************************************************************************************/
7156
7157/**
7158 * Internal work that allocates a variable with kind set to
7159 * kIemNativeVarKind_Invalid and no current stack allocation.
7160 *
7161 * The kind will either be set by the caller or later when the variable is first
7162 * assigned a value.
7163 *
7164 * @returns Unpacked index.
7165 * @internal
7166 */
7167static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7168{
7169 Assert(cbType > 0 && cbType <= 64);
7170 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
7171 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
7172 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
7173 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7174 pReNative->Core.aVars[idxVar].cbVar = cbType;
7175 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7176 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7177 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
7178 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
7179 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
7180 pReNative->Core.aVars[idxVar].fRegAcquired = false;
7181 pReNative->Core.aVars[idxVar].u.uValue = 0;
7182#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7183 pReNative->Core.aVars[idxVar].fSimdReg = false;
7184#endif
7185 return idxVar;
7186}
7187
7188
7189/**
7190 * Internal work that allocates an argument variable w/o setting enmKind.
7191 *
7192 * @returns Unpacked index.
7193 * @internal
7194 */
7195static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7196{
7197 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
7198 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7199 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
7200
7201 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7202 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
7203 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
7204 return idxVar;
7205}
7206
7207
7208/**
7209 * Gets the stack slot for a stack variable, allocating one if necessary.
7210 *
7211 * Calling this function implies that the stack slot will contain a valid
7212 * variable value. The caller deals with any register currently assigned to the
7213 * variable, typically by spilling it into the stack slot.
7214 *
7215 * @returns The stack slot number.
7216 * @param pReNative The recompiler state.
7217 * @param idxVar The variable.
7218 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
7219 */
7220DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7221{
7222 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7223 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7224 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
7225
7226 /* Already got a slot? */
7227 uint8_t const idxStackSlot = pVar->idxStackSlot;
7228 if (idxStackSlot != UINT8_MAX)
7229 {
7230 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
7231 return idxStackSlot;
7232 }
7233
7234 /*
7235 * A single slot is easy to allocate.
7236 * Allocate them from the top end, closest to BP, to reduce the displacement.
7237 */
7238 if (pVar->cbVar <= sizeof(uint64_t))
7239 {
7240 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7241 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7242 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
7243 pVar->idxStackSlot = (uint8_t)iSlot;
7244 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
7245 return (uint8_t)iSlot;
7246 }
7247
7248 /*
7249 * We need more than one stack slot.
7250 *
7251 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
7252 */
7253 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
7254 Assert(pVar->cbVar <= 64);
7255 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
7256 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
7257 uint32_t bmStack = pReNative->Core.bmStack;
7258 while (bmStack != UINT32_MAX)
7259 {
7260 unsigned iSlot = ASMBitLastSetU32(~bmStack);
7261 AssertStmt(iSlot, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7262 iSlot = (iSlot - 1) & ~fBitAlignMask;
7263 if ((bmStack & ~(fBitAllocMask << iSlot)) == bmStack)
7264 {
7265 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
7266 pVar->idxStackSlot = (uint8_t)iSlot;
7267 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7268 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
7269 return (uint8_t)iSlot;
7270 }
7271
7272 bmStack |= (fBitAllocMask << iSlot);
7273 }
7274 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7275}
7276
7277
7278/**
7279 * Changes the variable to a stack variable.
7280 *
7281 * Currently this is s only possible to do the first time the variable is used,
7282 * switching later is can be implemented but not done.
7283 *
7284 * @param pReNative The recompiler state.
7285 * @param idxVar The variable.
7286 * @throws VERR_IEM_VAR_IPE_2
7287 */
7288DECL_HIDDEN_THROW(void) iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7289{
7290 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7291 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7292 if (pVar->enmKind != kIemNativeVarKind_Stack)
7293 {
7294 /* We could in theory transition from immediate to stack as well, but it
7295 would involve the caller doing work storing the value on the stack. So,
7296 till that's required we only allow transition from invalid. */
7297 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7298 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7299 pVar->enmKind = kIemNativeVarKind_Stack;
7300
7301 /* Note! We don't allocate a stack slot here, that's only done when a
7302 slot is actually needed to hold a variable value. */
7303 }
7304}
7305
7306
7307/**
7308 * Sets it to a variable with a constant value.
7309 *
7310 * This does not require stack storage as we know the value and can always
7311 * reload it, unless of course it's referenced.
7312 *
7313 * @param pReNative The recompiler state.
7314 * @param idxVar The variable.
7315 * @param uValue The immediate value.
7316 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7317 */
7318DECL_HIDDEN_THROW(void) iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
7319{
7320 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7321 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7322 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7323 {
7324 /* Only simple transitions for now. */
7325 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7326 pVar->enmKind = kIemNativeVarKind_Immediate;
7327 }
7328 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7329
7330 pVar->u.uValue = uValue;
7331 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
7332 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
7333 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
7334}
7335
7336
7337/**
7338 * Sets the variable to a reference (pointer) to @a idxOtherVar.
7339 *
7340 * This does not require stack storage as we know the value and can always
7341 * reload it. Loading is postponed till needed.
7342 *
7343 * @param pReNative The recompiler state.
7344 * @param idxVar The variable. Unpacked.
7345 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
7346 *
7347 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7348 * @internal
7349 */
7350static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
7351{
7352 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
7353 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
7354
7355 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
7356 {
7357 /* Only simple transitions for now. */
7358 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7359 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7360 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
7361 }
7362 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7363
7364 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
7365
7366 /* Update the other variable, ensure it's a stack variable. */
7367 /** @todo handle variables with const values... that'll go boom now. */
7368 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
7369 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
7370}
7371
7372
7373/**
7374 * Sets the variable to a reference (pointer) to a guest register reference.
7375 *
7376 * This does not require stack storage as we know the value and can always
7377 * reload it. Loading is postponed till needed.
7378 *
7379 * @param pReNative The recompiler state.
7380 * @param idxVar The variable.
7381 * @param enmRegClass The class guest registers to reference.
7382 * @param idxReg The register within @a enmRegClass to reference.
7383 *
7384 * @throws VERR_IEM_VAR_IPE_2
7385 */
7386DECL_HIDDEN_THROW(void) iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
7387 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
7388{
7389 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7390 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7391
7392 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
7393 {
7394 /* Only simple transitions for now. */
7395 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7396 pVar->enmKind = kIemNativeVarKind_GstRegRef;
7397 }
7398 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7399
7400 pVar->u.GstRegRef.enmClass = enmRegClass;
7401 pVar->u.GstRegRef.idx = idxReg;
7402}
7403
7404
7405DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7406{
7407 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7408}
7409
7410
7411DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
7412{
7413 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7414
7415 /* Since we're using a generic uint64_t value type, we must truncate it if
7416 the variable is smaller otherwise we may end up with too large value when
7417 scaling up a imm8 w/ sign-extension.
7418
7419 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
7420 in the bios, bx=1) when running on arm, because clang expect 16-bit
7421 register parameters to have bits 16 and up set to zero. Instead of
7422 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
7423 CF value in the result. */
7424 switch (cbType)
7425 {
7426 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7427 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7428 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7429 }
7430 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7431 return idxVar;
7432}
7433
7434
7435DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
7436{
7437 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
7438 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
7439 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
7440 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
7441 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
7442 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7443
7444 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
7445 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
7446 return idxArgVar;
7447}
7448
7449
7450DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7451{
7452 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7453 /* Don't set to stack now, leave that to the first use as for instance
7454 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
7455 return idxVar;
7456}
7457
7458
7459DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
7460{
7461 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7462
7463 /* Since we're using a generic uint64_t value type, we must truncate it if
7464 the variable is smaller otherwise we may end up with too large value when
7465 scaling up a imm8 w/ sign-extension. */
7466 switch (cbType)
7467 {
7468 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7469 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7470 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7471 }
7472 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7473 return idxVar;
7474}
7475
7476
7477/**
7478 * Makes sure variable @a idxVar has a register assigned to it and that it stays
7479 * fixed till we call iemNativeVarRegisterRelease.
7480 *
7481 * @returns The host register number.
7482 * @param pReNative The recompiler state.
7483 * @param idxVar The variable.
7484 * @param poff Pointer to the instruction buffer offset.
7485 * In case a register needs to be freed up or the value
7486 * loaded off the stack.
7487 * @param fInitialized Set if the variable must already have been initialized.
7488 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7489 * the case.
7490 * @param idxRegPref Preferred register number or UINT8_MAX.
7491 */
7492DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7493 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7494{
7495 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7496 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7497 Assert(pVar->cbVar <= 8);
7498 Assert(!pVar->fRegAcquired);
7499
7500 uint8_t idxReg = pVar->idxReg;
7501 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7502 {
7503 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7504 && pVar->enmKind < kIemNativeVarKind_End);
7505 pVar->fRegAcquired = true;
7506 return idxReg;
7507 }
7508
7509 /*
7510 * If the kind of variable has not yet been set, default to 'stack'.
7511 */
7512 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7513 && pVar->enmKind < kIemNativeVarKind_End);
7514 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7515 iemNativeVarSetKindToStack(pReNative, idxVar);
7516
7517 /*
7518 * We have to allocate a register for the variable, even if its a stack one
7519 * as we don't know if there are modification being made to it before its
7520 * finalized (todo: analyze and insert hints about that?).
7521 *
7522 * If we can, we try get the correct register for argument variables. This
7523 * is assuming that most argument variables are fetched as close as possible
7524 * to the actual call, so that there aren't any interfering hidden calls
7525 * (memory accesses, etc) inbetween.
7526 *
7527 * If we cannot or it's a variable, we make sure no argument registers
7528 * that will be used by this MC block will be allocated here, and we always
7529 * prefer non-volatile registers to avoid needing to spill stuff for internal
7530 * call.
7531 */
7532 /** @todo Detect too early argument value fetches and warn about hidden
7533 * calls causing less optimal code to be generated in the python script. */
7534
7535 uint8_t const uArgNo = pVar->uArgNo;
7536 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
7537 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
7538 {
7539 idxReg = g_aidxIemNativeCallRegs[uArgNo];
7540 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7541 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
7542 }
7543 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
7544 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
7545 {
7546 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7547 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
7548 & ~pReNative->Core.bmHstRegsWithGstShadow
7549 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
7550 & fNotArgsMask;
7551 if (fRegs)
7552 {
7553 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
7554 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
7555 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
7556 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
7557 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
7558 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7559 }
7560 else
7561 {
7562 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7563 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
7564 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7565 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7566 }
7567 }
7568 else
7569 {
7570 idxReg = idxRegPref;
7571 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7572 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7573 }
7574 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7575 pVar->idxReg = idxReg;
7576
7577#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7578 pVar->fSimdReg = false;
7579#endif
7580
7581 /*
7582 * Load it off the stack if we've got a stack slot.
7583 */
7584 uint8_t const idxStackSlot = pVar->idxStackSlot;
7585 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7586 {
7587 Assert(fInitialized);
7588 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7589 switch (pVar->cbVar)
7590 {
7591 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
7592 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
7593 case 3: AssertFailed(); RT_FALL_THRU();
7594 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
7595 default: AssertFailed(); RT_FALL_THRU();
7596 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
7597 }
7598 }
7599 else
7600 {
7601 Assert(idxStackSlot == UINT8_MAX);
7602 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7603 }
7604 pVar->fRegAcquired = true;
7605 return idxReg;
7606}
7607
7608
7609#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7610/**
7611 * Makes sure variable @a idxVar has a SIMD register assigned to it and that it stays
7612 * fixed till we call iemNativeVarRegisterRelease.
7613 *
7614 * @returns The host register number.
7615 * @param pReNative The recompiler state.
7616 * @param idxVar The variable.
7617 * @param poff Pointer to the instruction buffer offset.
7618 * In case a register needs to be freed up or the value
7619 * loaded off the stack.
7620 * @param fInitialized Set if the variable must already have been initialized.
7621 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7622 * the case.
7623 * @param idxRegPref Preferred SIMD register number or UINT8_MAX.
7624 */
7625DECL_HIDDEN_THROW(uint8_t) iemNativeVarSimdRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7626 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7627{
7628 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7629 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7630 Assert( pVar->cbVar == sizeof(RTUINT128U)
7631 || pVar->cbVar == sizeof(RTUINT256U));
7632 Assert(!pVar->fRegAcquired);
7633
7634 uint8_t idxReg = pVar->idxReg;
7635 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs))
7636 {
7637 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7638 && pVar->enmKind < kIemNativeVarKind_End);
7639 pVar->fRegAcquired = true;
7640 return idxReg;
7641 }
7642
7643 /*
7644 * If the kind of variable has not yet been set, default to 'stack'.
7645 */
7646 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7647 && pVar->enmKind < kIemNativeVarKind_End);
7648 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7649 iemNativeVarSetKindToStack(pReNative, idxVar);
7650
7651 /*
7652 * We have to allocate a register for the variable, even if its a stack one
7653 * as we don't know if there are modification being made to it before its
7654 * finalized (todo: analyze and insert hints about that?).
7655 *
7656 * If we can, we try get the correct register for argument variables. This
7657 * is assuming that most argument variables are fetched as close as possible
7658 * to the actual call, so that there aren't any interfering hidden calls
7659 * (memory accesses, etc) inbetween.
7660 *
7661 * If we cannot or it's a variable, we make sure no argument registers
7662 * that will be used by this MC block will be allocated here, and we always
7663 * prefer non-volatile registers to avoid needing to spill stuff for internal
7664 * call.
7665 */
7666 /** @todo Detect too early argument value fetches and warn about hidden
7667 * calls causing less optimal code to be generated in the python script. */
7668
7669 uint8_t const uArgNo = pVar->uArgNo;
7670 Assert(uArgNo == UINT8_MAX); RT_NOREF(uArgNo); /* No SIMD registers as arguments for now. */
7671
7672 /* SIMD is bit simpler for now because there is no support for arguments. */
7673 if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
7674 || (pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegPref)))
7675 {
7676 uint32_t const fNotArgsMask = UINT32_MAX; //~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7677 uint32_t const fRegs = ~pReNative->Core.bmHstSimdRegs
7678 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
7679 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
7680 & fNotArgsMask;
7681 if (fRegs)
7682 {
7683 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
7684 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
7685 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows == 0);
7686 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg)));
7687 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7688 }
7689 else
7690 {
7691 idxReg = iemNativeSimdRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7692 IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & fNotArgsMask);
7693 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7694 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7695 }
7696 }
7697 else
7698 {
7699 idxReg = idxRegPref;
7700 AssertReleaseFailed(); //iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7701 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7702 }
7703 iemNativeSimdRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7704
7705 pVar->fSimdReg = true;
7706 pVar->idxReg = idxReg;
7707
7708 /*
7709 * Load it off the stack if we've got a stack slot.
7710 */
7711 uint8_t const idxStackSlot = pVar->idxStackSlot;
7712 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7713 {
7714 Assert(fInitialized);
7715 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7716 switch (pVar->cbVar)
7717 {
7718 case sizeof(RTUINT128U): *poff = iemNativeEmitLoadVecRegByBpU128(pReNative, *poff, idxReg, offDispBp); break;
7719 default: AssertFailed(); RT_FALL_THRU();
7720 case sizeof(RTUINT256U): *poff = iemNativeEmitLoadVecRegByBpU256(pReNative, *poff, idxReg, offDispBp); break;
7721 }
7722 }
7723 else
7724 {
7725 Assert(idxStackSlot == UINT8_MAX);
7726 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7727 }
7728 pVar->fRegAcquired = true;
7729 return idxReg;
7730}
7731#endif
7732
7733
7734/**
7735 * The value of variable @a idxVar will be written in full to the @a enmGstReg
7736 * guest register.
7737 *
7738 * This function makes sure there is a register for it and sets it to be the
7739 * current shadow copy of @a enmGstReg.
7740 *
7741 * @returns The host register number.
7742 * @param pReNative The recompiler state.
7743 * @param idxVar The variable.
7744 * @param enmGstReg The guest register this variable will be written to
7745 * after this call.
7746 * @param poff Pointer to the instruction buffer offset.
7747 * In case a register needs to be freed up or if the
7748 * variable content needs to be loaded off the stack.
7749 *
7750 * @note We DO NOT expect @a idxVar to be an argument variable,
7751 * because we can only in the commit stage of an instruction when this
7752 * function is used.
7753 */
7754DECL_HIDDEN_THROW(uint8_t)
7755iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
7756{
7757 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7758 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7759 Assert(!pVar->fRegAcquired);
7760 AssertMsgStmt( pVar->cbVar <= 8
7761 && ( pVar->enmKind == kIemNativeVarKind_Immediate
7762 || pVar->enmKind == kIemNativeVarKind_Stack),
7763 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
7764 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
7765 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7766
7767 /*
7768 * This shouldn't ever be used for arguments, unless it's in a weird else
7769 * branch that doesn't do any calling and even then it's questionable.
7770 *
7771 * However, in case someone writes crazy wrong MC code and does register
7772 * updates before making calls, just use the regular register allocator to
7773 * ensure we get a register suitable for the intended argument number.
7774 */
7775 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
7776
7777 /*
7778 * If there is already a register for the variable, we transfer/set the
7779 * guest shadow copy assignment to it.
7780 */
7781 uint8_t idxReg = pVar->idxReg;
7782 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7783 {
7784 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
7785 {
7786 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
7787 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
7788 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
7789 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
7790 }
7791 else
7792 {
7793 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
7794 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
7795 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
7796 }
7797 /** @todo figure this one out. We need some way of making sure the register isn't
7798 * modified after this point, just in case we start writing crappy MC code. */
7799 pVar->enmGstReg = enmGstReg;
7800 pVar->fRegAcquired = true;
7801 return idxReg;
7802 }
7803 Assert(pVar->uArgNo == UINT8_MAX);
7804
7805 /*
7806 * Because this is supposed to be the commit stage, we're just tag along with the
7807 * temporary register allocator and upgrade it to a variable register.
7808 */
7809 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
7810 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
7811 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
7812 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
7813 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
7814 pVar->idxReg = idxReg;
7815
7816 /*
7817 * Now we need to load the register value.
7818 */
7819 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7820 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7821 else
7822 {
7823 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7824 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7825 switch (pVar->cbVar)
7826 {
7827 case sizeof(uint64_t):
7828 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
7829 break;
7830 case sizeof(uint32_t):
7831 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
7832 break;
7833 case sizeof(uint16_t):
7834 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
7835 break;
7836 case sizeof(uint8_t):
7837 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
7838 break;
7839 default:
7840 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7841 }
7842 }
7843
7844 pVar->fRegAcquired = true;
7845 return idxReg;
7846}
7847
7848
7849/**
7850 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
7851 *
7852 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
7853 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
7854 * requirement of flushing anything in volatile host registers when making a
7855 * call.
7856 *
7857 * @returns New @a off value.
7858 * @param pReNative The recompiler state.
7859 * @param off The code buffer position.
7860 * @param fHstRegsNotToSave Set of registers not to save & restore.
7861 */
7862DECL_HIDDEN_THROW(uint32_t)
7863iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7864{
7865 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
7866 if (fHstRegs)
7867 {
7868 do
7869 {
7870 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7871 fHstRegs &= ~RT_BIT_32(idxHstReg);
7872
7873 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7874 {
7875 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7876 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7877 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7878 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7879 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7880 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7881 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7882 {
7883 case kIemNativeVarKind_Stack:
7884 {
7885 /* Temporarily spill the variable register. */
7886 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7887 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7888 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7889 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7890 continue;
7891 }
7892
7893 case kIemNativeVarKind_Immediate:
7894 case kIemNativeVarKind_VarRef:
7895 case kIemNativeVarKind_GstRegRef:
7896 /* It is weird to have any of these loaded at this point. */
7897 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7898 continue;
7899
7900 case kIemNativeVarKind_End:
7901 case kIemNativeVarKind_Invalid:
7902 break;
7903 }
7904 AssertFailed();
7905 }
7906 else
7907 {
7908 /*
7909 * Allocate a temporary stack slot and spill the register to it.
7910 */
7911 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7912 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
7913 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7914 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
7915 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
7916 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7917 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7918 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7919 }
7920 } while (fHstRegs);
7921 }
7922#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7923 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
7924 if (fHstRegs)
7925 {
7926 do
7927 {
7928 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7929 fHstRegs &= ~RT_BIT_32(idxHstReg);
7930
7931 /*
7932 * Guest registers are flushed to CPUMCTX at the moment and don't need allocating a stack slot
7933 * which would be more difficult due to spanning multiple stack slots and different sizes
7934 * (besides we only have a limited amount of slots at the moment). Fixed temporary registers
7935 * don't need saving.
7936 */
7937 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp
7938 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved)
7939 continue;
7940
7941 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
7942
7943 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
7944 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7945 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7946 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7947 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
7948 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
7949 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
7950 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
7951 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7952 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7953 {
7954 case kIemNativeVarKind_Stack:
7955 {
7956 /* Temporarily spill the variable register. */
7957 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
7958 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7959 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7960 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7961 if (cbVar == sizeof(RTUINT128U))
7962 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7963 else
7964 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7965 continue;
7966 }
7967
7968 case kIemNativeVarKind_Immediate:
7969 case kIemNativeVarKind_VarRef:
7970 case kIemNativeVarKind_GstRegRef:
7971 /* It is weird to have any of these loaded at this point. */
7972 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7973 continue;
7974
7975 case kIemNativeVarKind_End:
7976 case kIemNativeVarKind_Invalid:
7977 break;
7978 }
7979 AssertFailed();
7980 } while (fHstRegs);
7981 }
7982#endif
7983 return off;
7984}
7985
7986
7987/**
7988 * Emit code to restore volatile registers after to a call to a helper.
7989 *
7990 * @returns New @a off value.
7991 * @param pReNative The recompiler state.
7992 * @param off The code buffer position.
7993 * @param fHstRegsNotToSave Set of registers not to save & restore.
7994 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
7995 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
7996 */
7997DECL_HIDDEN_THROW(uint32_t)
7998iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7999{
8000 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
8001 if (fHstRegs)
8002 {
8003 do
8004 {
8005 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8006 fHstRegs &= ~RT_BIT_32(idxHstReg);
8007
8008 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
8009 {
8010 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
8011 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8012 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8013 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8014 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
8015 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8016 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8017 {
8018 case kIemNativeVarKind_Stack:
8019 {
8020 /* Unspill the variable register. */
8021 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8022 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
8023 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8024 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8025 continue;
8026 }
8027
8028 case kIemNativeVarKind_Immediate:
8029 case kIemNativeVarKind_VarRef:
8030 case kIemNativeVarKind_GstRegRef:
8031 /* It is weird to have any of these loaded at this point. */
8032 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8033 continue;
8034
8035 case kIemNativeVarKind_End:
8036 case kIemNativeVarKind_Invalid:
8037 break;
8038 }
8039 AssertFailed();
8040 }
8041 else
8042 {
8043 /*
8044 * Restore from temporary stack slot.
8045 */
8046 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
8047 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
8048 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
8049 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
8050
8051 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8052 }
8053 } while (fHstRegs);
8054 }
8055#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8056 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
8057 if (fHstRegs)
8058 {
8059 do
8060 {
8061 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8062 fHstRegs &= ~RT_BIT_32(idxHstReg);
8063
8064 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp
8065 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved)
8066 continue;
8067 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
8068
8069 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
8070 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8071 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8072 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8073 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
8074 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
8075 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
8076 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
8077 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8078 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8079 {
8080 case kIemNativeVarKind_Stack:
8081 {
8082 /* Unspill the variable register. */
8083 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
8084 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8085 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
8086 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8087
8088 if (cbVar == sizeof(RTUINT128U))
8089 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8090 else
8091 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8092 continue;
8093 }
8094
8095 case kIemNativeVarKind_Immediate:
8096 case kIemNativeVarKind_VarRef:
8097 case kIemNativeVarKind_GstRegRef:
8098 /* It is weird to have any of these loaded at this point. */
8099 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8100 continue;
8101
8102 case kIemNativeVarKind_End:
8103 case kIemNativeVarKind_Invalid:
8104 break;
8105 }
8106 AssertFailed();
8107 } while (fHstRegs);
8108 }
8109#endif
8110 return off;
8111}
8112
8113
8114/**
8115 * Worker that frees the stack slots for variable @a idxVar if any allocated.
8116 *
8117 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
8118 *
8119 * ASSUMES that @a idxVar is valid and unpacked.
8120 */
8121DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8122{
8123 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
8124 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
8125 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
8126 {
8127 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
8128 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
8129 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
8130 Assert(cSlots > 0);
8131 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
8132 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
8133 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
8134 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
8135 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
8136 }
8137 else
8138 Assert(idxStackSlot == UINT8_MAX);
8139}
8140
8141
8142/**
8143 * Worker that frees a single variable.
8144 *
8145 * ASSUMES that @a idxVar is valid and unpacked.
8146 */
8147DECLHIDDEN(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8148{
8149 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
8150 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
8151 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
8152
8153 /* Free the host register first if any assigned. */
8154 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8155#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8156 if ( idxHstReg != UINT8_MAX
8157 && pReNative->Core.aVars[idxVar].fSimdReg)
8158 {
8159 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8160 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8161 pReNative->Core.aHstSimdRegs[idxHstReg].idxVar = UINT8_MAX;
8162 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
8163 }
8164 else
8165#endif
8166 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8167 {
8168 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8169 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8170 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8171 }
8172
8173 /* Free argument mapping. */
8174 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
8175 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
8176 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
8177
8178 /* Free the stack slots. */
8179 iemNativeVarFreeStackSlots(pReNative, idxVar);
8180
8181 /* Free the actual variable. */
8182 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
8183 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8184}
8185
8186
8187/**
8188 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
8189 */
8190DECLHIDDEN(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
8191{
8192 while (bmVars != 0)
8193 {
8194 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8195 bmVars &= ~RT_BIT_32(idxVar);
8196
8197#if 1 /** @todo optimize by simplifying this later... */
8198 iemNativeVarFreeOneWorker(pReNative, idxVar);
8199#else
8200 /* Only need to free the host register, the rest is done as bulk updates below. */
8201 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8202 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8203 {
8204 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8205 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8206 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8207 }
8208#endif
8209 }
8210#if 0 /** @todo optimize by simplifying this later... */
8211 pReNative->Core.bmVars = 0;
8212 pReNative->Core.bmStack = 0;
8213 pReNative->Core.u64ArgVars = UINT64_MAX;
8214#endif
8215}
8216
8217
8218
8219/*********************************************************************************************************************************
8220* Emitters for IEM_MC_CALL_CIMPL_XXX *
8221*********************************************************************************************************************************/
8222
8223/**
8224 * Emits code to load a reference to the given guest register into @a idxGprDst.
8225 */
8226DECL_HIDDEN_THROW(uint32_t)
8227iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
8228 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
8229{
8230#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8231 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
8232#endif
8233
8234 /*
8235 * Get the offset relative to the CPUMCTX structure.
8236 */
8237 uint32_t offCpumCtx;
8238 switch (enmClass)
8239 {
8240 case kIemNativeGstRegRef_Gpr:
8241 Assert(idxRegInClass < 16);
8242 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
8243 break;
8244
8245 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
8246 Assert(idxRegInClass < 4);
8247 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
8248 break;
8249
8250 case kIemNativeGstRegRef_EFlags:
8251 Assert(idxRegInClass == 0);
8252 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
8253 break;
8254
8255 case kIemNativeGstRegRef_MxCsr:
8256 Assert(idxRegInClass == 0);
8257 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
8258 break;
8259
8260 case kIemNativeGstRegRef_FpuReg:
8261 Assert(idxRegInClass < 8);
8262 AssertFailed(); /** @todo what kind of indexing? */
8263 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8264 break;
8265
8266 case kIemNativeGstRegRef_MReg:
8267 Assert(idxRegInClass < 8);
8268 AssertFailed(); /** @todo what kind of indexing? */
8269 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8270 break;
8271
8272 case kIemNativeGstRegRef_XReg:
8273 Assert(idxRegInClass < 16);
8274 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
8275 break;
8276
8277 case kIemNativeGstRegRef_X87: /* Not a register actually but we would just duplicate code otherwise. */
8278 Assert(idxRegInClass == 0);
8279 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87);
8280 break;
8281
8282 case kIemNativeGstRegRef_XState: /* Not a register actually but we would just duplicate code otherwise. */
8283 Assert(idxRegInClass == 0);
8284 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState);
8285 break;
8286
8287 default:
8288 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
8289 }
8290
8291 /*
8292 * Load the value into the destination register.
8293 */
8294#ifdef RT_ARCH_AMD64
8295 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
8296
8297#elif defined(RT_ARCH_ARM64)
8298 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8299 Assert(offCpumCtx < 4096);
8300 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
8301
8302#else
8303# error "Port me!"
8304#endif
8305
8306 return off;
8307}
8308
8309
8310/**
8311 * Common code for CIMPL and AIMPL calls.
8312 *
8313 * These are calls that uses argument variables and such. They should not be
8314 * confused with internal calls required to implement an MC operation,
8315 * like a TLB load and similar.
8316 *
8317 * Upon return all that is left to do is to load any hidden arguments and
8318 * perform the call. All argument variables are freed.
8319 *
8320 * @returns New code buffer offset; throws VBox status code on error.
8321 * @param pReNative The native recompile state.
8322 * @param off The code buffer offset.
8323 * @param cArgs The total nubmer of arguments (includes hidden
8324 * count).
8325 * @param cHiddenArgs The number of hidden arguments. The hidden
8326 * arguments must not have any variable declared for
8327 * them, whereas all the regular arguments must
8328 * (tstIEMCheckMc ensures this).
8329 */
8330DECL_HIDDEN_THROW(uint32_t)
8331iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
8332{
8333#ifdef VBOX_STRICT
8334 /*
8335 * Assert sanity.
8336 */
8337 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
8338 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
8339 for (unsigned i = 0; i < cHiddenArgs; i++)
8340 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
8341 for (unsigned i = cHiddenArgs; i < cArgs; i++)
8342 {
8343 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
8344 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
8345 }
8346 iemNativeRegAssertSanity(pReNative);
8347#endif
8348
8349 /* We don't know what the called function makes use of, so flush any pending register writes. */
8350 off = iemNativeRegFlushPendingWrites(pReNative, off);
8351
8352 /*
8353 * Before we do anything else, go over variables that are referenced and
8354 * make sure they are not in a register.
8355 */
8356 uint32_t bmVars = pReNative->Core.bmVars;
8357 if (bmVars)
8358 {
8359 do
8360 {
8361 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8362 bmVars &= ~RT_BIT_32(idxVar);
8363
8364 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
8365 {
8366 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
8367#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8368 if ( idxRegOld != UINT8_MAX
8369 && pReNative->Core.aVars[idxVar].fSimdReg)
8370 {
8371 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8372 Assert(pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U) || pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT256U));
8373
8374 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8375 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8376 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8377 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8378 if (pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U))
8379 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8380 else
8381 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8382
8383 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
8384 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
8385
8386 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8387 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
8388 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8389 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
8390 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
8391 }
8392 else
8393#endif
8394 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
8395 {
8396 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8397 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8398 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8399 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8400 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8401
8402 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8403 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
8404 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8405 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
8406 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
8407 }
8408 }
8409 } while (bmVars != 0);
8410#if 0 //def VBOX_STRICT
8411 iemNativeRegAssertSanity(pReNative);
8412#endif
8413 }
8414
8415 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
8416
8417 /*
8418 * First, go over the host registers that will be used for arguments and make
8419 * sure they either hold the desired argument or are free.
8420 */
8421 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
8422 {
8423 for (uint32_t i = 0; i < cRegArgs; i++)
8424 {
8425 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8426 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8427 {
8428 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
8429 {
8430 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
8431 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8432 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8433 Assert(pVar->idxReg == idxArgReg);
8434 uint8_t const uArgNo = pVar->uArgNo;
8435 if (uArgNo == i)
8436 { /* prefect */ }
8437 /* The variable allocator logic should make sure this is impossible,
8438 except for when the return register is used as a parameter (ARM,
8439 but not x86). */
8440#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
8441 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
8442 {
8443# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8444# error "Implement this"
8445# endif
8446 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
8447 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
8448 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
8449 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8450 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
8451 }
8452#endif
8453 else
8454 {
8455 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8456
8457 if (pVar->enmKind == kIemNativeVarKind_Stack)
8458 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
8459 else
8460 {
8461 /* just free it, can be reloaded if used again */
8462 pVar->idxReg = UINT8_MAX;
8463 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
8464 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
8465 }
8466 }
8467 }
8468 else
8469 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
8470 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
8471 }
8472 }
8473#if 0 //def VBOX_STRICT
8474 iemNativeRegAssertSanity(pReNative);
8475#endif
8476 }
8477
8478 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
8479
8480#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8481 /*
8482 * If there are any stack arguments, make sure they are in their place as well.
8483 *
8484 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
8485 * the caller) be loading it later and it must be free (see first loop).
8486 */
8487 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
8488 {
8489 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
8490 {
8491 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8492 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
8493 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8494 {
8495 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
8496 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
8497 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
8498 pVar->idxReg = UINT8_MAX;
8499 }
8500 else
8501 {
8502 /* Use ARG0 as temp for stuff we need registers for. */
8503 switch (pVar->enmKind)
8504 {
8505 case kIemNativeVarKind_Stack:
8506 {
8507 uint8_t const idxStackSlot = pVar->idxStackSlot;
8508 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8509 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
8510 iemNativeStackCalcBpDisp(idxStackSlot));
8511 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8512 continue;
8513 }
8514
8515 case kIemNativeVarKind_Immediate:
8516 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
8517 continue;
8518
8519 case kIemNativeVarKind_VarRef:
8520 {
8521 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8522 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8523 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8524 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8525 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8526# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8527 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8528 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8529 if ( fSimdReg
8530 && idxRegOther != UINT8_MAX)
8531 {
8532 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8533 if (cbVar == sizeof(RTUINT128U))
8534 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8535 else
8536 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8537 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8538 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8539 }
8540 else
8541# endif
8542 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8543 {
8544 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8545 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8546 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8547 }
8548 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8549 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8550 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
8551 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8552 continue;
8553 }
8554
8555 case kIemNativeVarKind_GstRegRef:
8556 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
8557 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8558 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8559 continue;
8560
8561 case kIemNativeVarKind_Invalid:
8562 case kIemNativeVarKind_End:
8563 break;
8564 }
8565 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8566 }
8567 }
8568# if 0 //def VBOX_STRICT
8569 iemNativeRegAssertSanity(pReNative);
8570# endif
8571 }
8572#else
8573 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
8574#endif
8575
8576 /*
8577 * Make sure the argument variables are loaded into their respective registers.
8578 *
8579 * We can optimize this by ASSUMING that any register allocations are for
8580 * registeres that have already been loaded and are ready. The previous step
8581 * saw to that.
8582 */
8583 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
8584 {
8585 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8586 {
8587 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8588 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8589 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
8590 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
8591 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
8592 else
8593 {
8594 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8595 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8596 {
8597 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
8598 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
8599 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
8600 | RT_BIT_32(idxArgReg);
8601 pVar->idxReg = idxArgReg;
8602 }
8603 else
8604 {
8605 /* Use ARG0 as temp for stuff we need registers for. */
8606 switch (pVar->enmKind)
8607 {
8608 case kIemNativeVarKind_Stack:
8609 {
8610 uint8_t const idxStackSlot = pVar->idxStackSlot;
8611 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8612 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
8613 continue;
8614 }
8615
8616 case kIemNativeVarKind_Immediate:
8617 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
8618 continue;
8619
8620 case kIemNativeVarKind_VarRef:
8621 {
8622 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8623 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8624 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
8625 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8626 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8627 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8628#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8629 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8630 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8631 if ( fSimdReg
8632 && idxRegOther != UINT8_MAX)
8633 {
8634 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8635 if (cbVar == sizeof(RTUINT128U))
8636 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8637 else
8638 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8639 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8640 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8641 }
8642 else
8643#endif
8644 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8645 {
8646 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8647 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8648 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8649 }
8650 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8651 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8652 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
8653 continue;
8654 }
8655
8656 case kIemNativeVarKind_GstRegRef:
8657 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
8658 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8659 continue;
8660
8661 case kIemNativeVarKind_Invalid:
8662 case kIemNativeVarKind_End:
8663 break;
8664 }
8665 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8666 }
8667 }
8668 }
8669#if 0 //def VBOX_STRICT
8670 iemNativeRegAssertSanity(pReNative);
8671#endif
8672 }
8673#ifdef VBOX_STRICT
8674 else
8675 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8676 {
8677 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
8678 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
8679 }
8680#endif
8681
8682 /*
8683 * Free all argument variables (simplified).
8684 * Their lifetime always expires with the call they are for.
8685 */
8686 /** @todo Make the python script check that arguments aren't used after
8687 * IEM_MC_CALL_XXXX. */
8688 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
8689 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
8690 * an argument value. There is also some FPU stuff. */
8691 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
8692 {
8693 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
8694 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8695
8696 /* no need to free registers: */
8697 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
8698 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
8699 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
8700 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
8701 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
8702 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
8703
8704 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
8705 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8706 iemNativeVarFreeStackSlots(pReNative, idxVar);
8707 }
8708 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8709
8710 /*
8711 * Flush volatile registers as we make the call.
8712 */
8713 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
8714
8715 return off;
8716}
8717
8718
8719
8720/*********************************************************************************************************************************
8721* TLB Lookup. *
8722*********************************************************************************************************************************/
8723
8724/**
8725 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
8726 */
8727DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
8728{
8729 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
8730 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
8731 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
8732 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
8733
8734 /* Do the lookup manually. */
8735 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
8736 uint64_t const uTag = IEMTLB_CALC_TAG( &pVCpu->iem.s.DataTlb, GCPtrFlat);
8737 PIEMTLBENTRY const pTlbe = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
8738 if (RT_LIKELY(pTlbe->uTag == uTag))
8739 {
8740 /*
8741 * Check TLB page table level access flags.
8742 */
8743 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
8744 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
8745 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
8746 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
8747 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
8748 | IEMTLBE_F_PG_UNASSIGNED
8749 | IEMTLBE_F_PT_NO_ACCESSED
8750 | fNoWriteNoDirty | fNoUser);
8751 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
8752 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
8753 {
8754 /*
8755 * Return the address.
8756 */
8757 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
8758 if ((uintptr_t)pbAddr == uResult)
8759 return;
8760 RT_NOREF(cbMem);
8761 AssertFailed();
8762 }
8763 else
8764 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
8765 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
8766 }
8767 else
8768 AssertFailed();
8769 RT_BREAKPOINT();
8770}
8771
8772/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
8773
8774
8775
8776/*********************************************************************************************************************************
8777* Recompiler Core. *
8778*********************************************************************************************************************************/
8779
8780/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
8781static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
8782{
8783 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
8784 pDis->cbCachedInstr += cbMaxRead;
8785 RT_NOREF(cbMinRead);
8786 return VERR_NO_DATA;
8787}
8788
8789
8790DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
8791{
8792 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
8793 {
8794#define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */
8795 ENTRY(fLocalForcedActions),
8796 ENTRY(iem.s.rcPassUp),
8797 ENTRY(iem.s.fExec),
8798 ENTRY(iem.s.pbInstrBuf),
8799 ENTRY(iem.s.uInstrBufPc),
8800 ENTRY(iem.s.GCPhysInstrBuf),
8801 ENTRY(iem.s.cbInstrBufTotal),
8802 ENTRY(iem.s.idxTbCurInstr),
8803#ifdef VBOX_WITH_STATISTICS
8804 ENTRY(iem.s.StatNativeTlbHitsForFetch),
8805 ENTRY(iem.s.StatNativeTlbHitsForStore),
8806 ENTRY(iem.s.StatNativeTlbHitsForStack),
8807 ENTRY(iem.s.StatNativeTlbHitsForMapped),
8808 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
8809 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
8810 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
8811 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
8812#endif
8813 ENTRY(iem.s.DataTlb.aEntries),
8814 ENTRY(iem.s.DataTlb.uTlbRevision),
8815 ENTRY(iem.s.DataTlb.uTlbPhysRev),
8816 ENTRY(iem.s.DataTlb.cTlbHits),
8817 ENTRY(iem.s.CodeTlb.aEntries),
8818 ENTRY(iem.s.CodeTlb.uTlbRevision),
8819 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
8820 ENTRY(iem.s.CodeTlb.cTlbHits),
8821 ENTRY(pVMR3),
8822 ENTRY(cpum.GstCtx.rax),
8823 ENTRY(cpum.GstCtx.ah),
8824 ENTRY(cpum.GstCtx.rcx),
8825 ENTRY(cpum.GstCtx.ch),
8826 ENTRY(cpum.GstCtx.rdx),
8827 ENTRY(cpum.GstCtx.dh),
8828 ENTRY(cpum.GstCtx.rbx),
8829 ENTRY(cpum.GstCtx.bh),
8830 ENTRY(cpum.GstCtx.rsp),
8831 ENTRY(cpum.GstCtx.rbp),
8832 ENTRY(cpum.GstCtx.rsi),
8833 ENTRY(cpum.GstCtx.rdi),
8834 ENTRY(cpum.GstCtx.r8),
8835 ENTRY(cpum.GstCtx.r9),
8836 ENTRY(cpum.GstCtx.r10),
8837 ENTRY(cpum.GstCtx.r11),
8838 ENTRY(cpum.GstCtx.r12),
8839 ENTRY(cpum.GstCtx.r13),
8840 ENTRY(cpum.GstCtx.r14),
8841 ENTRY(cpum.GstCtx.r15),
8842 ENTRY(cpum.GstCtx.es.Sel),
8843 ENTRY(cpum.GstCtx.es.u64Base),
8844 ENTRY(cpum.GstCtx.es.u32Limit),
8845 ENTRY(cpum.GstCtx.es.Attr),
8846 ENTRY(cpum.GstCtx.cs.Sel),
8847 ENTRY(cpum.GstCtx.cs.u64Base),
8848 ENTRY(cpum.GstCtx.cs.u32Limit),
8849 ENTRY(cpum.GstCtx.cs.Attr),
8850 ENTRY(cpum.GstCtx.ss.Sel),
8851 ENTRY(cpum.GstCtx.ss.u64Base),
8852 ENTRY(cpum.GstCtx.ss.u32Limit),
8853 ENTRY(cpum.GstCtx.ss.Attr),
8854 ENTRY(cpum.GstCtx.ds.Sel),
8855 ENTRY(cpum.GstCtx.ds.u64Base),
8856 ENTRY(cpum.GstCtx.ds.u32Limit),
8857 ENTRY(cpum.GstCtx.ds.Attr),
8858 ENTRY(cpum.GstCtx.fs.Sel),
8859 ENTRY(cpum.GstCtx.fs.u64Base),
8860 ENTRY(cpum.GstCtx.fs.u32Limit),
8861 ENTRY(cpum.GstCtx.fs.Attr),
8862 ENTRY(cpum.GstCtx.gs.Sel),
8863 ENTRY(cpum.GstCtx.gs.u64Base),
8864 ENTRY(cpum.GstCtx.gs.u32Limit),
8865 ENTRY(cpum.GstCtx.gs.Attr),
8866 ENTRY(cpum.GstCtx.rip),
8867 ENTRY(cpum.GstCtx.eflags),
8868 ENTRY(cpum.GstCtx.uRipInhibitInt),
8869 ENTRY(cpum.GstCtx.cr0),
8870 ENTRY(cpum.GstCtx.cr4),
8871 ENTRY(cpum.GstCtx.aXcr[0]),
8872 ENTRY(cpum.GstCtx.aXcr[1]),
8873#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8874 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
8875 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
8876 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
8877 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
8878 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
8879 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
8880 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
8881 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
8882 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
8883 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
8884 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
8885 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
8886 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
8887 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
8888 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
8889 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
8890 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
8891 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
8892 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
8893 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
8894 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
8895 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
8896 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
8897 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
8898 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
8899 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
8900 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
8901 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
8902 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
8903 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
8904 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
8905 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
8906#endif
8907#undef ENTRY
8908 };
8909#ifdef VBOX_STRICT
8910 static bool s_fOrderChecked = false;
8911 if (!s_fOrderChecked)
8912 {
8913 s_fOrderChecked = true;
8914 uint32_t offPrev = s_aMembers[0].off;
8915 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
8916 {
8917 Assert(s_aMembers[i].off > offPrev);
8918 offPrev = s_aMembers[i].off;
8919 }
8920 }
8921#endif
8922
8923 /*
8924 * Binary lookup.
8925 */
8926 unsigned iStart = 0;
8927 unsigned iEnd = RT_ELEMENTS(s_aMembers);
8928 for (;;)
8929 {
8930 unsigned const iCur = iStart + (iEnd - iStart) / 2;
8931 uint32_t const offCur = s_aMembers[iCur].off;
8932 if (off < offCur)
8933 {
8934 if (iCur != iStart)
8935 iEnd = iCur;
8936 else
8937 break;
8938 }
8939 else if (off > offCur)
8940 {
8941 if (iCur + 1 < iEnd)
8942 iStart = iCur + 1;
8943 else
8944 break;
8945 }
8946 else
8947 return s_aMembers[iCur].pszName;
8948 }
8949#ifdef VBOX_WITH_STATISTICS
8950 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
8951 return "iem.s.acThreadedFuncStats[iFn]";
8952#endif
8953 return NULL;
8954}
8955
8956
8957DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
8958{
8959 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
8960#if defined(RT_ARCH_AMD64)
8961 static const char * const a_apszMarkers[] =
8962 {
8963 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
8964 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
8965 };
8966#endif
8967
8968 char szDisBuf[512];
8969 DISSTATE Dis;
8970 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
8971 uint32_t const cNative = pTb->Native.cInstructions;
8972 uint32_t offNative = 0;
8973#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8974 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
8975#endif
8976 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
8977 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
8978 : DISCPUMODE_64BIT;
8979#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8980 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
8981#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8982 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
8983#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
8984# error "Port me"
8985#else
8986 csh hDisasm = ~(size_t)0;
8987# if defined(RT_ARCH_AMD64)
8988 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
8989# elif defined(RT_ARCH_ARM64)
8990 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
8991# else
8992# error "Port me"
8993# endif
8994 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
8995
8996 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
8997 //Assert(rcCs == CS_ERR_OK);
8998#endif
8999
9000 /*
9001 * Print TB info.
9002 */
9003 pHlp->pfnPrintf(pHlp,
9004 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
9005 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
9006 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
9007 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
9008#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9009 if (pDbgInfo && pDbgInfo->cEntries > 1)
9010 {
9011 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
9012
9013 /*
9014 * This disassembly is driven by the debug info which follows the native
9015 * code and indicates when it starts with the next guest instructions,
9016 * where labels are and such things.
9017 */
9018 uint32_t idxThreadedCall = 0;
9019 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
9020 uint8_t idxRange = UINT8_MAX;
9021 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
9022 uint32_t offRange = 0;
9023 uint32_t offOpcodes = 0;
9024 uint32_t const cbOpcodes = pTb->cbOpcodes;
9025 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
9026 uint32_t const cDbgEntries = pDbgInfo->cEntries;
9027 uint32_t iDbgEntry = 1;
9028 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
9029
9030 while (offNative < cNative)
9031 {
9032 /* If we're at or have passed the point where the next chunk of debug
9033 info starts, process it. */
9034 if (offDbgNativeNext <= offNative)
9035 {
9036 offDbgNativeNext = UINT32_MAX;
9037 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
9038 {
9039 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
9040 {
9041 case kIemTbDbgEntryType_GuestInstruction:
9042 {
9043 /* Did the exec flag change? */
9044 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
9045 {
9046 pHlp->pfnPrintf(pHlp,
9047 " fExec change %#08x -> %#08x %s\n",
9048 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9049 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9050 szDisBuf, sizeof(szDisBuf)));
9051 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
9052 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9053 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9054 : DISCPUMODE_64BIT;
9055 }
9056
9057 /* New opcode range? We need to fend up a spurious debug info entry here for cases
9058 where the compilation was aborted before the opcode was recorded and the actual
9059 instruction was translated to a threaded call. This may happen when we run out
9060 of ranges, or when some complicated interrupts/FFs are found to be pending or
9061 similar. So, we just deal with it here rather than in the compiler code as it
9062 is a lot simpler to do here. */
9063 if ( idxRange == UINT8_MAX
9064 || idxRange >= cRanges
9065 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
9066 {
9067 idxRange += 1;
9068 if (idxRange < cRanges)
9069 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
9070 else
9071 continue;
9072 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
9073 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
9074 + (pTb->aRanges[idxRange].idxPhysPage == 0
9075 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9076 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
9077 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9078 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
9079 pTb->aRanges[idxRange].idxPhysPage);
9080 GCPhysPc += offRange;
9081 }
9082
9083 /* Disassemble the instruction. */
9084 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
9085 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
9086 uint32_t cbInstr = 1;
9087 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9088 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
9089 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9090 if (RT_SUCCESS(rc))
9091 {
9092 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9093 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9094 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9095 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9096
9097 static unsigned const s_offMarker = 55;
9098 static char const s_szMarker[] = " ; <--- guest";
9099 if (cch < s_offMarker)
9100 {
9101 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
9102 cch = s_offMarker;
9103 }
9104 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
9105 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
9106
9107 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
9108 }
9109 else
9110 {
9111 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
9112 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
9113 cbInstr = 1;
9114 }
9115 GCPhysPc += cbInstr;
9116 offOpcodes += cbInstr;
9117 offRange += cbInstr;
9118 continue;
9119 }
9120
9121 case kIemTbDbgEntryType_ThreadedCall:
9122 pHlp->pfnPrintf(pHlp,
9123 " Call #%u to %s (%u args) - %s\n",
9124 idxThreadedCall,
9125 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9126 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9127 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
9128 idxThreadedCall++;
9129 continue;
9130
9131 case kIemTbDbgEntryType_GuestRegShadowing:
9132 {
9133 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9134 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
9135 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
9136 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
9137 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9138 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
9139 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
9140 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
9141 else
9142 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
9143 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
9144 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9145 continue;
9146 }
9147
9148#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9149 case kIemTbDbgEntryType_GuestSimdRegShadowing:
9150 {
9151 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9152 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
9153 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
9154 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
9155 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9156 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
9157 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
9158 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
9159 else
9160 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
9161 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
9162 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9163 continue;
9164 }
9165#endif
9166
9167 case kIemTbDbgEntryType_Label:
9168 {
9169 const char *pszName = "what_the_fudge";
9170 const char *pszComment = "";
9171 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
9172 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
9173 {
9174 case kIemNativeLabelType_Return: pszName = "Return"; break;
9175 case kIemNativeLabelType_ReturnBreak: pszName = "ReturnBreak"; break;
9176 case kIemNativeLabelType_ReturnWithFlags: pszName = "ReturnWithFlags"; break;
9177 case kIemNativeLabelType_NonZeroRetOrPassUp: pszName = "NonZeroRetOrPassUp"; break;
9178 case kIemNativeLabelType_RaiseDe: pszName = "RaiseDe"; break;
9179 case kIemNativeLabelType_RaiseUd: pszName = "RaiseUd"; break;
9180 case kIemNativeLabelType_RaiseSseRelated: pszName = "RaiseSseRelated"; break;
9181 case kIemNativeLabelType_RaiseAvxRelated: pszName = "RaiseAvxRelated"; break;
9182 case kIemNativeLabelType_RaiseNm: pszName = "RaiseNm"; break;
9183 case kIemNativeLabelType_RaiseGp0: pszName = "RaiseGp0"; break;
9184 case kIemNativeLabelType_RaiseMf: pszName = "RaiseMf"; break;
9185 case kIemNativeLabelType_RaiseXf: pszName = "RaiseXf"; break;
9186 case kIemNativeLabelType_ObsoleteTb: pszName = "ObsoleteTb"; break;
9187 case kIemNativeLabelType_NeedCsLimChecking: pszName = "NeedCsLimChecking"; break;
9188 case kIemNativeLabelType_CheckBranchMiss: pszName = "CheckBranchMiss"; break;
9189 case kIemNativeLabelType_If:
9190 pszName = "If";
9191 fNumbered = true;
9192 break;
9193 case kIemNativeLabelType_Else:
9194 pszName = "Else";
9195 fNumbered = true;
9196 pszComment = " ; regs state restored pre-if-block";
9197 break;
9198 case kIemNativeLabelType_Endif:
9199 pszName = "Endif";
9200 fNumbered = true;
9201 break;
9202 case kIemNativeLabelType_CheckIrq:
9203 pszName = "CheckIrq_CheckVM";
9204 fNumbered = true;
9205 break;
9206 case kIemNativeLabelType_TlbLookup:
9207 pszName = "TlbLookup";
9208 fNumbered = true;
9209 break;
9210 case kIemNativeLabelType_TlbMiss:
9211 pszName = "TlbMiss";
9212 fNumbered = true;
9213 break;
9214 case kIemNativeLabelType_TlbDone:
9215 pszName = "TlbDone";
9216 fNumbered = true;
9217 break;
9218 case kIemNativeLabelType_Invalid:
9219 case kIemNativeLabelType_End:
9220 break;
9221 }
9222 if (fNumbered)
9223 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
9224 else
9225 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
9226 continue;
9227 }
9228
9229 case kIemTbDbgEntryType_NativeOffset:
9230 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
9231 Assert(offDbgNativeNext > offNative);
9232 break;
9233
9234#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
9235 case kIemTbDbgEntryType_DelayedPcUpdate:
9236 pHlp->pfnPrintf(pHlp,
9237 " Updating guest PC value by %u (cInstrSkipped=%u)\n",
9238 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
9239 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
9240 continue;
9241#endif
9242
9243 default:
9244 AssertFailed();
9245 }
9246 iDbgEntry++;
9247 break;
9248 }
9249 }
9250
9251 /*
9252 * Disassemble the next native instruction.
9253 */
9254 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9255# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9256 uint32_t cbInstr = sizeof(paNative[0]);
9257 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9258 if (RT_SUCCESS(rc))
9259 {
9260# if defined(RT_ARCH_AMD64)
9261 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9262 {
9263 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9264 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9265 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9266 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9267 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9268 uInfo & 0x8000 ? "recompiled" : "todo");
9269 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9270 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9271 else
9272 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9273 }
9274 else
9275# endif
9276 {
9277 const char *pszAnnotation = NULL;
9278# ifdef RT_ARCH_AMD64
9279 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9280 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9281 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9282 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9283 PCDISOPPARAM pMemOp;
9284 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))
9285 pMemOp = &Dis.Param1;
9286 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))
9287 pMemOp = &Dis.Param2;
9288 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))
9289 pMemOp = &Dis.Param3;
9290 else
9291 pMemOp = NULL;
9292 if ( pMemOp
9293 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
9294 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
9295 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
9296 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
9297
9298#elif defined(RT_ARCH_ARM64)
9299 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9300 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9301 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9302# else
9303# error "Port me"
9304# endif
9305 if (pszAnnotation)
9306 {
9307 static unsigned const s_offAnnotation = 55;
9308 size_t const cchAnnotation = strlen(pszAnnotation);
9309 size_t cchDis = strlen(szDisBuf);
9310 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
9311 {
9312 if (cchDis < s_offAnnotation)
9313 {
9314 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
9315 cchDis = s_offAnnotation;
9316 }
9317 szDisBuf[cchDis++] = ' ';
9318 szDisBuf[cchDis++] = ';';
9319 szDisBuf[cchDis++] = ' ';
9320 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
9321 }
9322 }
9323 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9324 }
9325 }
9326 else
9327 {
9328# if defined(RT_ARCH_AMD64)
9329 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9330 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9331# elif defined(RT_ARCH_ARM64)
9332 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9333# else
9334# error "Port me"
9335# endif
9336 cbInstr = sizeof(paNative[0]);
9337 }
9338 offNative += cbInstr / sizeof(paNative[0]);
9339
9340# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9341 cs_insn *pInstr;
9342 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9343 (uintptr_t)pNativeCur, 1, &pInstr);
9344 if (cInstrs > 0)
9345 {
9346 Assert(cInstrs == 1);
9347 const char *pszAnnotation = NULL;
9348# if defined(RT_ARCH_ARM64)
9349 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
9350 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
9351 {
9352 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
9353 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
9354 char *psz = strchr(pInstr->op_str, '[');
9355 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
9356 {
9357 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
9358 int32_t off = -1;
9359 psz += 4;
9360 if (*psz == ']')
9361 off = 0;
9362 else if (*psz == ',')
9363 {
9364 psz = RTStrStripL(psz + 1);
9365 if (*psz == '#')
9366 off = RTStrToInt32(&psz[1]);
9367 /** @todo deal with index registers and LSL as well... */
9368 }
9369 if (off >= 0)
9370 pszAnnotation = iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
9371 }
9372 }
9373# endif
9374
9375 size_t const cchOp = strlen(pInstr->op_str);
9376# if defined(RT_ARCH_AMD64)
9377 if (pszAnnotation)
9378 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
9379 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
9380 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9381 else
9382 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9383 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9384
9385# else
9386 if (pszAnnotation)
9387 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
9388 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
9389 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9390 else
9391 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9392 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9393# endif
9394 offNative += pInstr->size / sizeof(*pNativeCur);
9395 cs_free(pInstr, cInstrs);
9396 }
9397 else
9398 {
9399# if defined(RT_ARCH_AMD64)
9400 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9401 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9402# else
9403 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9404# endif
9405 offNative++;
9406 }
9407# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9408 }
9409 }
9410 else
9411#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
9412 {
9413 /*
9414 * No debug info, just disassemble the x86 code and then the native code.
9415 *
9416 * First the guest code:
9417 */
9418 for (unsigned i = 0; i < pTb->cRanges; i++)
9419 {
9420 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
9421 + (pTb->aRanges[i].idxPhysPage == 0
9422 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9423 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
9424 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9425 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
9426 unsigned off = pTb->aRanges[i].offOpcodes;
9427 /** @todo this ain't working when crossing pages! */
9428 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
9429 while (off < cbOpcodes)
9430 {
9431 uint32_t cbInstr = 1;
9432 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9433 &pTb->pabOpcodes[off], cbOpcodes - off,
9434 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9435 if (RT_SUCCESS(rc))
9436 {
9437 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9438 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9439 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9440 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9441 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
9442 GCPhysPc += cbInstr;
9443 off += cbInstr;
9444 }
9445 else
9446 {
9447 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
9448 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
9449 break;
9450 }
9451 }
9452 }
9453
9454 /*
9455 * Then the native code:
9456 */
9457 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
9458 while (offNative < cNative)
9459 {
9460 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9461# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9462 uint32_t cbInstr = sizeof(paNative[0]);
9463 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9464 if (RT_SUCCESS(rc))
9465 {
9466# if defined(RT_ARCH_AMD64)
9467 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9468 {
9469 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9470 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9471 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9472 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9473 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9474 uInfo & 0x8000 ? "recompiled" : "todo");
9475 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9476 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9477 else
9478 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9479 }
9480 else
9481# endif
9482 {
9483# ifdef RT_ARCH_AMD64
9484 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9485 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9486 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9487 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9488# elif defined(RT_ARCH_ARM64)
9489 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9490 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9491 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9492# else
9493# error "Port me"
9494# endif
9495 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9496 }
9497 }
9498 else
9499 {
9500# if defined(RT_ARCH_AMD64)
9501 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9502 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9503# else
9504 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9505# endif
9506 cbInstr = sizeof(paNative[0]);
9507 }
9508 offNative += cbInstr / sizeof(paNative[0]);
9509
9510# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9511 cs_insn *pInstr;
9512 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9513 (uintptr_t)pNativeCur, 1, &pInstr);
9514 if (cInstrs > 0)
9515 {
9516 Assert(cInstrs == 1);
9517# if defined(RT_ARCH_AMD64)
9518 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9519 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9520# else
9521 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9522 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9523# endif
9524 offNative += pInstr->size / sizeof(*pNativeCur);
9525 cs_free(pInstr, cInstrs);
9526 }
9527 else
9528 {
9529# if defined(RT_ARCH_AMD64)
9530 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9531 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9532# else
9533 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9534# endif
9535 offNative++;
9536 }
9537# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9538 }
9539 }
9540
9541#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9542 /* Cleanup. */
9543 cs_close(&hDisasm);
9544#endif
9545}
9546
9547
9548/**
9549 * Recompiles the given threaded TB into a native one.
9550 *
9551 * In case of failure the translation block will be returned as-is.
9552 *
9553 * @returns pTb.
9554 * @param pVCpu The cross context virtual CPU structure of the calling
9555 * thread.
9556 * @param pTb The threaded translation to recompile to native.
9557 */
9558DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
9559{
9560 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
9561
9562 /*
9563 * The first time thru, we allocate the recompiler state, the other times
9564 * we just need to reset it before using it again.
9565 */
9566 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
9567 if (RT_LIKELY(pReNative))
9568 iemNativeReInit(pReNative, pTb);
9569 else
9570 {
9571 pReNative = iemNativeInit(pVCpu, pTb);
9572 AssertReturn(pReNative, pTb);
9573 }
9574
9575#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9576 /*
9577 * First do liveness analysis. This is done backwards.
9578 */
9579 {
9580 uint32_t idxCall = pTb->Thrd.cCalls;
9581 if (idxCall <= pReNative->cLivenessEntriesAlloc)
9582 { /* likely */ }
9583 else
9584 {
9585 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
9586 while (idxCall > cAlloc)
9587 cAlloc *= 2;
9588 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
9589 AssertReturn(pvNew, pTb);
9590 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
9591 pReNative->cLivenessEntriesAlloc = cAlloc;
9592 }
9593 AssertReturn(idxCall > 0, pTb);
9594 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
9595
9596 /* The initial (final) entry. */
9597 idxCall--;
9598 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
9599
9600 /* Loop backwards thru the calls and fill in the other entries. */
9601 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
9602 while (idxCall > 0)
9603 {
9604 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
9605 if (pfnLiveness)
9606 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
9607 else
9608 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
9609 pCallEntry--;
9610 idxCall--;
9611 }
9612
9613# ifdef VBOX_WITH_STATISTICS
9614 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
9615 to 'clobbered' rather that 'input'. */
9616 /** @todo */
9617# endif
9618 }
9619#endif
9620
9621 /*
9622 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
9623 * for aborting if an error happens.
9624 */
9625 uint32_t cCallsLeft = pTb->Thrd.cCalls;
9626#ifdef LOG_ENABLED
9627 uint32_t const cCallsOrg = cCallsLeft;
9628#endif
9629 uint32_t off = 0;
9630 int rc = VINF_SUCCESS;
9631 IEMNATIVE_TRY_SETJMP(pReNative, rc)
9632 {
9633 /*
9634 * Emit prolog code (fixed).
9635 */
9636 off = iemNativeEmitProlog(pReNative, off);
9637
9638 /*
9639 * Convert the calls to native code.
9640 */
9641#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9642 int32_t iGstInstr = -1;
9643#endif
9644#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
9645 uint32_t cThreadedCalls = 0;
9646 uint32_t cRecompiledCalls = 0;
9647#endif
9648#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9649 uint32_t idxCurCall = 0;
9650#endif
9651 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
9652 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
9653 while (cCallsLeft-- > 0)
9654 {
9655 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
9656#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9657 pReNative->idxCurCall = idxCurCall;
9658#endif
9659
9660 /*
9661 * Debug info, assembly markup and statistics.
9662 */
9663#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
9664 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
9665 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
9666#endif
9667#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9668 iemNativeDbgInfoAddNativeOffset(pReNative, off);
9669 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
9670 {
9671 if (iGstInstr < (int32_t)pTb->cInstructions)
9672 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
9673 else
9674 Assert(iGstInstr == pTb->cInstructions);
9675 iGstInstr = pCallEntry->idxInstr;
9676 }
9677 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
9678#endif
9679#if defined(VBOX_STRICT)
9680 off = iemNativeEmitMarker(pReNative, off,
9681 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
9682#endif
9683#if defined(VBOX_STRICT)
9684 iemNativeRegAssertSanity(pReNative);
9685#endif
9686#ifdef VBOX_WITH_STATISTICS
9687 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
9688#endif
9689
9690 /*
9691 * Actual work.
9692 */
9693 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
9694 pfnRecom ? "(recompiled)" : "(todo)"));
9695 if (pfnRecom) /** @todo stats on this. */
9696 {
9697 off = pfnRecom(pReNative, off, pCallEntry);
9698 STAM_REL_STATS({cRecompiledCalls++;});
9699 }
9700 else
9701 {
9702 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
9703 STAM_REL_STATS({cThreadedCalls++;});
9704 }
9705 Assert(off <= pReNative->cInstrBufAlloc);
9706 Assert(pReNative->cCondDepth == 0);
9707
9708#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
9709 if (LogIs2Enabled())
9710 {
9711 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
9712# ifndef IEMLIVENESS_EXTENDED_LAYOUT
9713 static const char s_achState[] = "CUXI";
9714# else
9715 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
9716# endif
9717
9718 char szGpr[17];
9719 for (unsigned i = 0; i < 16; i++)
9720 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
9721 szGpr[16] = '\0';
9722
9723 char szSegBase[X86_SREG_COUNT + 1];
9724 char szSegLimit[X86_SREG_COUNT + 1];
9725 char szSegAttrib[X86_SREG_COUNT + 1];
9726 char szSegSel[X86_SREG_COUNT + 1];
9727 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
9728 {
9729 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
9730 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
9731 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
9732 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
9733 }
9734 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
9735 = szSegSel[X86_SREG_COUNT] = '\0';
9736
9737 char szEFlags[8];
9738 for (unsigned i = 0; i < 7; i++)
9739 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
9740 szEFlags[7] = '\0';
9741
9742 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
9743 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
9744 }
9745#endif
9746
9747 /*
9748 * Advance.
9749 */
9750 pCallEntry++;
9751#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
9752 idxCurCall++;
9753#endif
9754 }
9755
9756 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
9757 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
9758 if (!cThreadedCalls)
9759 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
9760
9761 /*
9762 * Emit the epilog code.
9763 */
9764 uint32_t idxReturnLabel;
9765 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
9766
9767 /*
9768 * Generate special jump labels.
9769 */
9770 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
9771 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
9772 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
9773 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
9774
9775 /*
9776 * Generate simple TB tail labels that just calls a help with a pVCpu
9777 * arg and either return or longjmps/throws a non-zero status.
9778 *
9779 * The array entries must be ordered by enmLabel value so we can index
9780 * using fTailLabels bit numbers.
9781 */
9782 typedef IEM_DECL_NATIVE_HLP_PTR(int, PFNIEMNATIVESIMPLETAILLABELCALL,(PVMCPUCC pVCpu));
9783 static struct
9784 {
9785 IEMNATIVELABELTYPE enmLabel;
9786 PFNIEMNATIVESIMPLETAILLABELCALL pfnCallback;
9787 } const g_aSimpleTailLabels[] =
9788 {
9789 { kIemNativeLabelType_Invalid, NULL },
9790 { kIemNativeLabelType_RaiseDe, iemNativeHlpExecRaiseDe },
9791 { kIemNativeLabelType_RaiseUd, iemNativeHlpExecRaiseUd },
9792 { kIemNativeLabelType_RaiseSseRelated, iemNativeHlpExecRaiseSseRelated },
9793 { kIemNativeLabelType_RaiseAvxRelated, iemNativeHlpExecRaiseAvxRelated },
9794 { kIemNativeLabelType_RaiseNm, iemNativeHlpExecRaiseNm },
9795 { kIemNativeLabelType_RaiseGp0, iemNativeHlpExecRaiseGp0 },
9796 { kIemNativeLabelType_RaiseMf, iemNativeHlpExecRaiseMf },
9797 { kIemNativeLabelType_RaiseXf, iemNativeHlpExecRaiseXf },
9798 { kIemNativeLabelType_ObsoleteTb, iemNativeHlpObsoleteTb },
9799 { kIemNativeLabelType_NeedCsLimChecking, iemNativeHlpNeedCsLimChecking },
9800 { kIemNativeLabelType_CheckBranchMiss, iemNativeHlpCheckBranchMiss },
9801 };
9802 AssertCompile(RT_ELEMENTS(g_aSimpleTailLabels) == (unsigned)kIemNativeLabelType_LastSimple + 1U);
9803 AssertCompile(kIemNativeLabelType_Invalid == 0);
9804 uint64_t fTailLabels = pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastSimple + 1U) - 2U);
9805 if (fTailLabels)
9806 {
9807 do
9808 {
9809 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)(ASMBitFirstSetU64(fTailLabels) - 1U);
9810 fTailLabels &= ~RT_BIT_64(enmLabel);
9811 Assert(g_aSimpleTailLabels[enmLabel].enmLabel == enmLabel);
9812
9813 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
9814 Assert(idxLabel != UINT32_MAX);
9815 if (idxLabel != UINT32_MAX)
9816 {
9817 iemNativeLabelDefine(pReNative, idxLabel, off);
9818
9819 /* int pfnCallback(PVMCPUCC pVCpu) */
9820 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9821 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_aSimpleTailLabels[enmLabel].pfnCallback);
9822
9823 /* jump back to the return sequence. */
9824 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
9825 }
9826
9827 } while (fTailLabels);
9828 }
9829 }
9830 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
9831 {
9832 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
9833 return pTb;
9834 }
9835 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
9836 Assert(off <= pReNative->cInstrBufAlloc);
9837
9838 /*
9839 * Make sure all labels has been defined.
9840 */
9841 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
9842#ifdef VBOX_STRICT
9843 uint32_t const cLabels = pReNative->cLabels;
9844 for (uint32_t i = 0; i < cLabels; i++)
9845 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
9846#endif
9847
9848 /*
9849 * Allocate executable memory, copy over the code we've generated.
9850 */
9851 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
9852 if (pTbAllocator->pDelayedFreeHead)
9853 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
9854
9855 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
9856 AssertReturn(paFinalInstrBuf, pTb);
9857 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
9858
9859 /*
9860 * Apply fixups.
9861 */
9862 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
9863 uint32_t const cFixups = pReNative->cFixups;
9864 for (uint32_t i = 0; i < cFixups; i++)
9865 {
9866 Assert(paFixups[i].off < off);
9867 Assert(paFixups[i].idxLabel < cLabels);
9868 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
9869 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
9870 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
9871 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
9872 switch (paFixups[i].enmType)
9873 {
9874#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
9875 case kIemNativeFixupType_Rel32:
9876 Assert(paFixups[i].off + 4 <= off);
9877 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9878 continue;
9879
9880#elif defined(RT_ARCH_ARM64)
9881 case kIemNativeFixupType_RelImm26At0:
9882 {
9883 Assert(paFixups[i].off < off);
9884 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9885 Assert(offDisp >= -262144 && offDisp < 262144);
9886 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
9887 continue;
9888 }
9889
9890 case kIemNativeFixupType_RelImm19At5:
9891 {
9892 Assert(paFixups[i].off < off);
9893 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9894 Assert(offDisp >= -262144 && offDisp < 262144);
9895 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
9896 continue;
9897 }
9898
9899 case kIemNativeFixupType_RelImm14At5:
9900 {
9901 Assert(paFixups[i].off < off);
9902 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
9903 Assert(offDisp >= -8192 && offDisp < 8192);
9904 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
9905 continue;
9906 }
9907
9908#endif
9909 case kIemNativeFixupType_Invalid:
9910 case kIemNativeFixupType_End:
9911 break;
9912 }
9913 AssertFailed();
9914 }
9915
9916 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
9917 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
9918
9919 /*
9920 * Convert the translation block.
9921 */
9922 RTMemFree(pTb->Thrd.paCalls);
9923 pTb->Native.paInstructions = paFinalInstrBuf;
9924 pTb->Native.cInstructions = off;
9925 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
9926#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9927 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
9928 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
9929#endif
9930
9931 Assert(pTbAllocator->cThreadedTbs > 0);
9932 pTbAllocator->cThreadedTbs -= 1;
9933 pTbAllocator->cNativeTbs += 1;
9934 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
9935
9936#ifdef LOG_ENABLED
9937 /*
9938 * Disassemble to the log if enabled.
9939 */
9940 if (LogIs3Enabled())
9941 {
9942 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
9943 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
9944# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
9945 RTLogFlush(NULL);
9946# endif
9947 }
9948#endif
9949 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
9950
9951 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
9952 return pTb;
9953}
9954
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette