VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 104112

Last change on this file since 104112 was 104112, checked in by vboxsync, 8 months ago

VMM/IEM: build fix. bugref:10370

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 461.6 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 104112 2024-03-28 23:57:19Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/mem.h>
62#include <iprt/string.h>
63#if defined(RT_ARCH_AMD64)
64# include <iprt/x86.h>
65#elif defined(RT_ARCH_ARM64)
66# include <iprt/armv8.h>
67#endif
68
69#ifdef RT_OS_WINDOWS
70# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
71extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
73#else
74# include <iprt/formats/dwarf.h>
75# if defined(RT_OS_DARWIN)
76# include <libkern/OSCacheControl.h>
77# define IEMNATIVE_USE_LIBUNWIND
78extern "C" void __register_frame(const void *pvFde);
79extern "C" void __deregister_frame(const void *pvFde);
80# else
81# ifdef DEBUG_bird /** @todo not thread safe yet */
82# define IEMNATIVE_USE_GDB_JIT
83# endif
84# ifdef IEMNATIVE_USE_GDB_JIT
85# include <iprt/critsect.h>
86# include <iprt/once.h>
87# include <iprt/formats/elf64.h>
88# endif
89extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
90extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
91# endif
92#endif
93#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
94# include "/opt/local/include/capstone/capstone.h"
95#endif
96
97#include "IEMInline.h"
98#include "IEMThreadedFunctions.h"
99#include "IEMN8veRecompiler.h"
100#include "IEMN8veRecompilerEmit.h"
101#include "IEMN8veRecompilerTlbLookup.h"
102#include "IEMNativeFunctions.h"
103
104
105/*
106 * Narrow down configs here to avoid wasting time on unused configs here.
107 * Note! Same checks in IEMAllThrdRecompiler.cpp.
108 */
109
110#ifndef IEM_WITH_CODE_TLB
111# error The code TLB must be enabled for the recompiler.
112#endif
113
114#ifndef IEM_WITH_DATA_TLB
115# error The data TLB must be enabled for the recompiler.
116#endif
117
118#ifndef IEM_WITH_SETJMP
119# error The setjmp approach must be enabled for the recompiler.
120#endif
121
122/** @todo eliminate this clang build hack. */
123#if RT_CLANG_PREREQ(4, 0)
124# pragma GCC diagnostic ignored "-Wunused-function"
125#endif
126
127
128/*********************************************************************************************************************************
129* Internal Functions *
130*********************************************************************************************************************************/
131#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
132static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
133#endif
134DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
135DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
136 IEMNATIVEGSTREG enmGstReg, uint32_t off);
137DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
138
139
140/*********************************************************************************************************************************
141* Executable Memory Allocator *
142*********************************************************************************************************************************/
143/** The chunk sub-allocation unit size in bytes. */
144#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
145/** The chunk sub-allocation unit size as a shift factor. */
146#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
147/** Enables adding a header to the sub-allocator allocations.
148 * This is useful for freeing up executable memory among other things. */
149#define IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
150
151
152#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
153# ifdef IEMNATIVE_USE_GDB_JIT
154# define IEMNATIVE_USE_GDB_JIT_ET_DYN
155
156/** GDB JIT: Code entry. */
157typedef struct GDBJITCODEENTRY
158{
159 struct GDBJITCODEENTRY *pNext;
160 struct GDBJITCODEENTRY *pPrev;
161 uint8_t *pbSymFile;
162 uint64_t cbSymFile;
163} GDBJITCODEENTRY;
164
165/** GDB JIT: Actions. */
166typedef enum GDBJITACTIONS : uint32_t
167{
168 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
169} GDBJITACTIONS;
170
171/** GDB JIT: Descriptor. */
172typedef struct GDBJITDESCRIPTOR
173{
174 uint32_t uVersion;
175 GDBJITACTIONS enmAction;
176 GDBJITCODEENTRY *pRelevant;
177 GDBJITCODEENTRY *pHead;
178 /** Our addition: */
179 GDBJITCODEENTRY *pTail;
180} GDBJITDESCRIPTOR;
181
182/** GDB JIT: Our simple symbol file data. */
183typedef struct GDBJITSYMFILE
184{
185 Elf64_Ehdr EHdr;
186# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
187 Elf64_Shdr aShdrs[5];
188# else
189 Elf64_Shdr aShdrs[7];
190 Elf64_Phdr aPhdrs[2];
191# endif
192 /** The dwarf ehframe data for the chunk. */
193 uint8_t abEhFrame[512];
194 char szzStrTab[128];
195 Elf64_Sym aSymbols[3];
196# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
197 Elf64_Sym aDynSyms[2];
198 Elf64_Dyn aDyn[6];
199# endif
200} GDBJITSYMFILE;
201
202extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
203extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
204
205/** Init once for g_IemNativeGdbJitLock. */
206static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
207/** Init once for the critical section. */
208static RTCRITSECT g_IemNativeGdbJitLock;
209
210/** GDB reads the info here. */
211GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
212
213/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
214DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
215{
216 ASMNopPause();
217}
218
219/** @callback_method_impl{FNRTONCE} */
220static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
221{
222 RT_NOREF(pvUser);
223 return RTCritSectInit(&g_IemNativeGdbJitLock);
224}
225
226
227# endif /* IEMNATIVE_USE_GDB_JIT */
228
229/**
230 * Per-chunk unwind info for non-windows hosts.
231 */
232typedef struct IEMEXECMEMCHUNKEHFRAME
233{
234# ifdef IEMNATIVE_USE_LIBUNWIND
235 /** The offset of the FDA into abEhFrame. */
236 uintptr_t offFda;
237# else
238 /** 'struct object' storage area. */
239 uint8_t abObject[1024];
240# endif
241# ifdef IEMNATIVE_USE_GDB_JIT
242# if 0
243 /** The GDB JIT 'symbol file' data. */
244 GDBJITSYMFILE GdbJitSymFile;
245# endif
246 /** The GDB JIT list entry. */
247 GDBJITCODEENTRY GdbJitEntry;
248# endif
249 /** The dwarf ehframe data for the chunk. */
250 uint8_t abEhFrame[512];
251} IEMEXECMEMCHUNKEHFRAME;
252/** Pointer to per-chunk info info for non-windows hosts. */
253typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
254#endif
255
256
257/**
258 * An chunk of executable memory.
259 */
260typedef struct IEMEXECMEMCHUNK
261{
262 /** Number of free items in this chunk. */
263 uint32_t cFreeUnits;
264 /** Hint were to start searching for free space in the allocation bitmap. */
265 uint32_t idxFreeHint;
266 /** Pointer to the chunk. */
267 void *pvChunk;
268#ifdef IN_RING3
269 /**
270 * Pointer to the unwind information.
271 *
272 * This is used during C++ throw and longjmp (windows and probably most other
273 * platforms). Some debuggers (windbg) makes use of it as well.
274 *
275 * Windows: This is allocated from hHeap on windows because (at least for
276 * AMD64) the UNWIND_INFO structure address in the
277 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
278 *
279 * Others: Allocated from the regular heap to avoid unnecessary executable data
280 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
281 void *pvUnwindInfo;
282#elif defined(IN_RING0)
283 /** Allocation handle. */
284 RTR0MEMOBJ hMemObj;
285#endif
286} IEMEXECMEMCHUNK;
287/** Pointer to a memory chunk. */
288typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
289
290
291/**
292 * Executable memory allocator for the native recompiler.
293 */
294typedef struct IEMEXECMEMALLOCATOR
295{
296 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
297 uint32_t uMagic;
298
299 /** The chunk size. */
300 uint32_t cbChunk;
301 /** The maximum number of chunks. */
302 uint32_t cMaxChunks;
303 /** The current number of chunks. */
304 uint32_t cChunks;
305 /** Hint where to start looking for available memory. */
306 uint32_t idxChunkHint;
307 /** Statistics: Current number of allocations. */
308 uint32_t cAllocations;
309
310 /** The total amount of memory available. */
311 uint64_t cbTotal;
312 /** Total amount of free memory. */
313 uint64_t cbFree;
314 /** Total amount of memory allocated. */
315 uint64_t cbAllocated;
316
317 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
318 *
319 * Since the chunk size is a power of two and the minimum chunk size is a lot
320 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
321 * require a whole number of uint64_t elements in the allocation bitmap. So,
322 * for sake of simplicity, they are allocated as one continous chunk for
323 * simplicity/laziness. */
324 uint64_t *pbmAlloc;
325 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
326 uint32_t cUnitsPerChunk;
327 /** Number of bitmap elements per chunk (for quickly locating the bitmap
328 * portion corresponding to an chunk). */
329 uint32_t cBitmapElementsPerChunk;
330#ifdef VBOX_WITH_STATISTICS
331 STAMPROFILE StatAlloc;
332#endif
333
334#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
335 /** Pointer to the array of unwind info running parallel to aChunks (same
336 * allocation as this structure, located after the bitmaps).
337 * (For Windows, the structures must reside in 32-bit RVA distance to the
338 * actual chunk, so they are allocated off the chunk.) */
339 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
340#endif
341
342 /** The allocation chunks. */
343 RT_FLEXIBLE_ARRAY_EXTENSION
344 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
345} IEMEXECMEMALLOCATOR;
346/** Pointer to an executable memory allocator. */
347typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
348
349/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
350#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
351
352
353#ifdef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
354/**
355 * Allocation header.
356 */
357typedef struct IEMEXECMEMALLOCHDR
358{
359 /** Magic value / eyecatcher (IEMEXECMEMALLOCHDR_MAGIC). */
360 uint32_t uMagic;
361 /** The allocation chunk (for speeding up freeing). */
362 uint32_t idxChunk;
363 /** Pointer to the translation block the allocation belongs to.
364 * This is the whole point of the header. */
365 PIEMTB pTb;
366} IEMEXECMEMALLOCHDR;
367/** Pointer to an allocation header. */
368typedef IEMEXECMEMALLOCHDR *PIEMEXECMEMALLOCHDR;
369/** Magic value for IEMEXECMEMALLOCHDR ('ExeM'). */
370# define IEMEXECMEMALLOCHDR_MAGIC UINT32_C(0x4d657845)
371#endif
372
373
374static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
375
376
377/**
378 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
379 * the heap statistics.
380 */
381static void *iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
382 uint32_t cbReq, uint32_t idxChunk)
383{
384 pExecMemAllocator->cAllocations += 1;
385 pExecMemAllocator->cbAllocated += cbReq;
386 pExecMemAllocator->cbFree -= cbReq;
387 pExecMemAllocator->idxChunkHint = idxChunk;
388
389#ifdef RT_OS_DARWIN
390 /*
391 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
392 * on darwin. So, we mark the pages returned as read+write after alloc and
393 * expect the caller to call iemExecMemAllocatorReadyForUse when done
394 * writing to the allocation.
395 *
396 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
397 * for details.
398 */
399 /** @todo detect if this is necessary... it wasn't required on 10.15 or
400 * whatever older version it was. */
401 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
402 AssertRC(rc);
403#endif
404
405 return pvRet;
406}
407
408
409static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
410 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk, PIEMTB pTb)
411{
412 /*
413 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
414 */
415 Assert(!(cToScan & 63));
416 Assert(!(idxFirst & 63));
417 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
418 pbmAlloc += idxFirst / 64;
419
420 /*
421 * Scan the bitmap for cReqUnits of consequtive clear bits
422 */
423 /** @todo This can probably be done more efficiently for non-x86 systems. */
424 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
425 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
426 {
427 uint32_t idxAddBit = 1;
428 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
429 idxAddBit++;
430 if (idxAddBit >= cReqUnits)
431 {
432 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
433
434 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
435 pChunk->cFreeUnits -= cReqUnits;
436 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
437
438# ifdef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
439 PIEMEXECMEMALLOCHDR pHdr = (PIEMEXECMEMALLOCHDR)((uint8_t *)pChunk->pvChunk
440 + ( (idxFirst + (uint32_t)iBit)
441 << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT));
442 pHdr->uMagic = IEMEXECMEMALLOCHDR_MAGIC;
443 pHdr->idxChunk = idxChunk;
444 pHdr->pTb = pTb;
445 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pHdr + 1,
446 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
447#else
448 RT_NOREF(pTb);
449 void * const pvRet = (uint8_t *)pChunk->pvChunk
450 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
451 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
452 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
453#endif
454 }
455
456 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
457 }
458 return NULL;
459}
460
461
462static void *
463iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq, PIEMTB pTb)
464{
465 /*
466 * Figure out how much to allocate.
467 */
468#ifdef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
469 uint32_t const cReqUnits = (cbReq + sizeof(IEMEXECMEMALLOCHDR) + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)
470#else
471 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)
472#endif
473 >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
474 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
475 {
476 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
477 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
478 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
479 {
480 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
481 pExecMemAllocator->cUnitsPerChunk - idxHint,
482 cReqUnits, idxChunk, pTb);
483 if (pvRet)
484 return pvRet;
485 }
486 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
487 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
488 cReqUnits, idxChunk, pTb);
489 }
490 return NULL;
491}
492
493
494/**
495 * Allocates @a cbReq bytes of executable memory.
496 *
497 * @returns Pointer to the memory, NULL if out of memory or other problem
498 * encountered.
499 * @param pVCpu The cross context virtual CPU structure of the calling
500 * thread.
501 * @param cbReq How many bytes are required.
502 * @param pTb The translation block that will be using the allocation.
503 */
504static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq, PIEMTB pTb)
505{
506 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
507 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
508 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
509 STAM_PROFILE_START(&pExecMemAllocator->StatAlloc, a);
510
511 for (unsigned iIteration = 0;; iIteration++)
512 {
513 if (cbReq <= pExecMemAllocator->cbFree)
514 {
515 uint32_t const cChunks = pExecMemAllocator->cChunks;
516 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
517 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
518 {
519 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq, pTb);
520 if (pvRet)
521 {
522 STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);
523 return pvRet;
524 }
525 }
526 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
527 {
528 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq, pTb);
529 if (pvRet)
530 {
531 STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);
532 return pvRet;
533 }
534 }
535 }
536
537 /*
538 * Can we grow it with another chunk?
539 */
540 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
541 {
542 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
543 AssertLogRelRCReturn(rc, NULL);
544
545 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
546 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq, pTb);
547 if (pvRet)
548 {
549 STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);
550 return pvRet;
551 }
552 AssertFailed();
553 }
554
555 /*
556 * Try prune native TBs once.
557 */
558 if (iIteration == 0)
559 {
560 /* No header included in the instruction count here. */
561 uint32_t const cNeededInstrs = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) / sizeof(IEMNATIVEINSTR);
562 iemTbAllocatorFreeupNativeSpace(pVCpu, cNeededInstrs);
563 }
564 else
565 {
566 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeExecMemInstrBufAllocFailed);
567 STAM_PROFILE_STOP(&pExecMemAllocator->StatAlloc, a);
568 return NULL;
569 }
570 }
571}
572
573
574/** This is a hook that we may need later for changing memory protection back
575 * to readonly+exec */
576static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
577{
578#ifdef RT_OS_DARWIN
579 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
580 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
581 AssertRC(rc); RT_NOREF(pVCpu);
582
583 /*
584 * Flush the instruction cache:
585 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
586 */
587 /* sys_dcache_flush(pv, cb); - not necessary */
588 sys_icache_invalidate(pv, cb);
589#else
590 RT_NOREF(pVCpu, pv, cb);
591#endif
592}
593
594
595/**
596 * Frees executable memory.
597 */
598void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
599{
600 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
601 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
602 AssertPtr(pv);
603#ifndef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
604 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
605
606 /* Align the size as we did when allocating the block. */
607 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
608
609#else
610 PIEMEXECMEMALLOCHDR pHdr = (PIEMEXECMEMALLOCHDR)pv - 1;
611 Assert(!((uintptr_t)pHdr & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
612 AssertReturnVoid(pHdr->uMagic == IEMEXECMEMALLOCHDR_MAGIC);
613 uint32_t const idxChunk = pHdr->idxChunk;
614 AssertReturnVoid(idxChunk < pExecMemAllocator->cChunks);
615 pv = pHdr;
616
617 /* Adjust and align the size to cover the whole allocation area. */
618 cb = RT_ALIGN_Z(cb + sizeof(*pHdr), IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
619#endif
620
621 /* Free it / assert sanity. */
622 bool fFound = false;
623 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
624#ifndef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
625 uint32_t const cChunks = pExecMemAllocator->cChunks;
626 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
627#endif
628 {
629 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
630 fFound = offChunk < cbChunk;
631 if (fFound)
632 {
633 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
634 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
635
636 /* Check that it's valid and free it. */
637 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
638 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
639 for (uint32_t i = 1; i < cReqUnits; i++)
640 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
641 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
642#ifdef IEMEXECMEM_ALT_SUB_WITH_ALLOC_HEADER
643 pHdr->uMagic = 0;
644 pHdr->idxChunk = 0;
645 pHdr->pTb = NULL;
646#endif
647 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
648 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
649
650 /* Update the stats. */
651 pExecMemAllocator->cbAllocated -= cb;
652 pExecMemAllocator->cbFree += cb;
653 pExecMemAllocator->cAllocations -= 1;
654 return;
655 }
656 }
657 AssertFailed();
658}
659
660
661
662#ifdef IN_RING3
663# ifdef RT_OS_WINDOWS
664
665/**
666 * Initializes the unwind info structures for windows hosts.
667 */
668static int
669iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
670 void *pvChunk, uint32_t idxChunk)
671{
672 RT_NOREF(pVCpu);
673
674 /*
675 * The AMD64 unwind opcodes.
676 *
677 * This is a program that starts with RSP after a RET instruction that
678 * ends up in recompiled code, and the operations we describe here will
679 * restore all non-volatile registers and bring RSP back to where our
680 * RET address is. This means it's reverse order from what happens in
681 * the prologue.
682 *
683 * Note! Using a frame register approach here both because we have one
684 * and but mainly because the UWOP_ALLOC_LARGE argument values
685 * would be a pain to write initializers for. On the positive
686 * side, we're impervious to changes in the the stack variable
687 * area can can deal with dynamic stack allocations if necessary.
688 */
689 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
690 {
691 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
692 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
693 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
694 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
695 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
696 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
697 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
698 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
699 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
700 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
701 };
702 union
703 {
704 IMAGE_UNWIND_INFO Info;
705 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
706 } s_UnwindInfo =
707 {
708 {
709 /* .Version = */ 1,
710 /* .Flags = */ 0,
711 /* .SizeOfProlog = */ 16, /* whatever */
712 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
713 /* .FrameRegister = */ X86_GREG_xBP,
714 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
715 }
716 };
717 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
718 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
719
720 /*
721 * Calc how much space we need and allocate it off the exec heap.
722 */
723 unsigned const cFunctionEntries = 1;
724 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
725 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
726 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
727 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeeded, NULL);
728 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
729 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
730
731 /*
732 * Initialize the structures.
733 */
734 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
735
736 paFunctions[0].BeginAddress = 0;
737 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
738 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
739
740 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
741 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
742
743 /*
744 * Register it.
745 */
746 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
747 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
748
749 return VINF_SUCCESS;
750}
751
752
753# else /* !RT_OS_WINDOWS */
754
755/**
756 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
757 */
758DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
759{
760 if (iValue >= 64)
761 {
762 Assert(iValue < 0x2000);
763 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
764 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
765 }
766 else if (iValue >= 0)
767 *Ptr.pb++ = (uint8_t)iValue;
768 else if (iValue > -64)
769 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
770 else
771 {
772 Assert(iValue > -0x2000);
773 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
774 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
775 }
776 return Ptr;
777}
778
779
780/**
781 * Emits an ULEB128 encoded value (up to 64-bit wide).
782 */
783DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
784{
785 while (uValue >= 0x80)
786 {
787 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
788 uValue >>= 7;
789 }
790 *Ptr.pb++ = (uint8_t)uValue;
791 return Ptr;
792}
793
794
795/**
796 * Emits a CFA rule as register @a uReg + offset @a off.
797 */
798DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
799{
800 *Ptr.pb++ = DW_CFA_def_cfa;
801 Ptr = iemDwarfPutUleb128(Ptr, uReg);
802 Ptr = iemDwarfPutUleb128(Ptr, off);
803 return Ptr;
804}
805
806
807/**
808 * Emits a register (@a uReg) save location:
809 * CFA + @a off * data_alignment_factor
810 */
811DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
812{
813 if (uReg < 0x40)
814 *Ptr.pb++ = DW_CFA_offset | uReg;
815 else
816 {
817 *Ptr.pb++ = DW_CFA_offset_extended;
818 Ptr = iemDwarfPutUleb128(Ptr, uReg);
819 }
820 Ptr = iemDwarfPutUleb128(Ptr, off);
821 return Ptr;
822}
823
824
825# if 0 /* unused */
826/**
827 * Emits a register (@a uReg) save location, using signed offset:
828 * CFA + @a offSigned * data_alignment_factor
829 */
830DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
831{
832 *Ptr.pb++ = DW_CFA_offset_extended_sf;
833 Ptr = iemDwarfPutUleb128(Ptr, uReg);
834 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
835 return Ptr;
836}
837# endif
838
839
840/**
841 * Initializes the unwind info section for non-windows hosts.
842 */
843static int
844iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
845 void *pvChunk, uint32_t idxChunk)
846{
847 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
848 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
849
850 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
851
852 /*
853 * Generate the CIE first.
854 */
855# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
856 uint8_t const iDwarfVer = 3;
857# else
858 uint8_t const iDwarfVer = 4;
859# endif
860 RTPTRUNION const PtrCie = Ptr;
861 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
862 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
863 *Ptr.pb++ = iDwarfVer; /* DwARF version */
864 *Ptr.pb++ = 0; /* Augmentation. */
865 if (iDwarfVer >= 4)
866 {
867 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
868 *Ptr.pb++ = 0; /* Segment selector size. */
869 }
870# ifdef RT_ARCH_AMD64
871 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
872# else
873 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
874# endif
875 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
876# ifdef RT_ARCH_AMD64
877 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
878# elif defined(RT_ARCH_ARM64)
879 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
880# else
881# error "port me"
882# endif
883 /* Initial instructions: */
884# ifdef RT_ARCH_AMD64
885 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
886 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
887 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
888 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
889 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
890 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
891 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
892 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
893# elif defined(RT_ARCH_ARM64)
894# if 1
895 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
896# else
897 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
898# endif
899 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
900 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
901 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
902 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
903 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
904 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
905 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
906 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
907 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
908 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
909 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
910 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
911 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
912 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
913# else
914# error "port me"
915# endif
916 while ((Ptr.u - PtrCie.u) & 3)
917 *Ptr.pb++ = DW_CFA_nop;
918 /* Finalize the CIE size. */
919 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
920
921 /*
922 * Generate an FDE for the whole chunk area.
923 */
924# ifdef IEMNATIVE_USE_LIBUNWIND
925 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
926# endif
927 RTPTRUNION const PtrFde = Ptr;
928 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
929 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
930 Ptr.pu32++;
931 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
932 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
933# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
934 *Ptr.pb++ = DW_CFA_nop;
935# endif
936 while ((Ptr.u - PtrFde.u) & 3)
937 *Ptr.pb++ = DW_CFA_nop;
938 /* Finalize the FDE size. */
939 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
940
941 /* Terminator entry. */
942 *Ptr.pu32++ = 0;
943 *Ptr.pu32++ = 0; /* just to be sure... */
944 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
945
946 /*
947 * Register it.
948 */
949# ifdef IEMNATIVE_USE_LIBUNWIND
950 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
951# else
952 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
953 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
954# endif
955
956# ifdef IEMNATIVE_USE_GDB_JIT
957 /*
958 * Now for telling GDB about this (experimental).
959 *
960 * This seems to work best with ET_DYN.
961 */
962 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk,
963 sizeof(GDBJITSYMFILE), NULL);
964 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
965 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
966
967 RT_ZERO(*pSymFile);
968
969 /*
970 * The ELF header:
971 */
972 pSymFile->EHdr.e_ident[0] = ELFMAG0;
973 pSymFile->EHdr.e_ident[1] = ELFMAG1;
974 pSymFile->EHdr.e_ident[2] = ELFMAG2;
975 pSymFile->EHdr.e_ident[3] = ELFMAG3;
976 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
977 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
978 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
979 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
980# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
981 pSymFile->EHdr.e_type = ET_DYN;
982# else
983 pSymFile->EHdr.e_type = ET_REL;
984# endif
985# ifdef RT_ARCH_AMD64
986 pSymFile->EHdr.e_machine = EM_AMD64;
987# elif defined(RT_ARCH_ARM64)
988 pSymFile->EHdr.e_machine = EM_AARCH64;
989# else
990# error "port me"
991# endif
992 pSymFile->EHdr.e_version = 1; /*?*/
993 pSymFile->EHdr.e_entry = 0;
994# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
995 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
996# else
997 pSymFile->EHdr.e_phoff = 0;
998# endif
999 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1000 pSymFile->EHdr.e_flags = 0;
1001 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1002# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1003 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1004 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1005# else
1006 pSymFile->EHdr.e_phentsize = 0;
1007 pSymFile->EHdr.e_phnum = 0;
1008# endif
1009 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1010 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1011 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1012
1013 uint32_t offStrTab = 0;
1014#define APPEND_STR(a_szStr) do { \
1015 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1016 offStrTab += sizeof(a_szStr); \
1017 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1018 } while (0)
1019#define APPEND_STR_FMT(a_szStr, ...) do { \
1020 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1021 offStrTab++; \
1022 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1023 } while (0)
1024
1025 /*
1026 * Section headers.
1027 */
1028 /* Section header #0: NULL */
1029 unsigned i = 0;
1030 APPEND_STR("");
1031 RT_ZERO(pSymFile->aShdrs[i]);
1032 i++;
1033
1034 /* Section header: .eh_frame */
1035 pSymFile->aShdrs[i].sh_name = offStrTab;
1036 APPEND_STR(".eh_frame");
1037 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1038 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1039# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1040 pSymFile->aShdrs[i].sh_offset
1041 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1042# else
1043 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1044 pSymFile->aShdrs[i].sh_offset = 0;
1045# endif
1046
1047 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1048 pSymFile->aShdrs[i].sh_link = 0;
1049 pSymFile->aShdrs[i].sh_info = 0;
1050 pSymFile->aShdrs[i].sh_addralign = 1;
1051 pSymFile->aShdrs[i].sh_entsize = 0;
1052 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1053 i++;
1054
1055 /* Section header: .shstrtab */
1056 unsigned const iShStrTab = i;
1057 pSymFile->EHdr.e_shstrndx = iShStrTab;
1058 pSymFile->aShdrs[i].sh_name = offStrTab;
1059 APPEND_STR(".shstrtab");
1060 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1061 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1062# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1063 pSymFile->aShdrs[i].sh_offset
1064 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1065# else
1066 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1067 pSymFile->aShdrs[i].sh_offset = 0;
1068# endif
1069 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1070 pSymFile->aShdrs[i].sh_link = 0;
1071 pSymFile->aShdrs[i].sh_info = 0;
1072 pSymFile->aShdrs[i].sh_addralign = 1;
1073 pSymFile->aShdrs[i].sh_entsize = 0;
1074 i++;
1075
1076 /* Section header: .symbols */
1077 pSymFile->aShdrs[i].sh_name = offStrTab;
1078 APPEND_STR(".symtab");
1079 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1080 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1081 pSymFile->aShdrs[i].sh_offset
1082 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1083 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1084 pSymFile->aShdrs[i].sh_link = iShStrTab;
1085 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1086 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1087 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1088 i++;
1089
1090# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1091 /* Section header: .symbols */
1092 pSymFile->aShdrs[i].sh_name = offStrTab;
1093 APPEND_STR(".dynsym");
1094 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1095 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1096 pSymFile->aShdrs[i].sh_offset
1097 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1098 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1099 pSymFile->aShdrs[i].sh_link = iShStrTab;
1100 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1101 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1102 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1103 i++;
1104# endif
1105
1106# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1107 /* Section header: .dynamic */
1108 pSymFile->aShdrs[i].sh_name = offStrTab;
1109 APPEND_STR(".dynamic");
1110 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1111 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1112 pSymFile->aShdrs[i].sh_offset
1113 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1114 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1115 pSymFile->aShdrs[i].sh_link = iShStrTab;
1116 pSymFile->aShdrs[i].sh_info = 0;
1117 pSymFile->aShdrs[i].sh_addralign = 1;
1118 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1119 i++;
1120# endif
1121
1122 /* Section header: .text */
1123 unsigned const iShText = i;
1124 pSymFile->aShdrs[i].sh_name = offStrTab;
1125 APPEND_STR(".text");
1126 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1127 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1128# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1129 pSymFile->aShdrs[i].sh_offset
1130 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1131# else
1132 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1133 pSymFile->aShdrs[i].sh_offset = 0;
1134# endif
1135 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1136 pSymFile->aShdrs[i].sh_link = 0;
1137 pSymFile->aShdrs[i].sh_info = 0;
1138 pSymFile->aShdrs[i].sh_addralign = 1;
1139 pSymFile->aShdrs[i].sh_entsize = 0;
1140 i++;
1141
1142 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1143
1144# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1145 /*
1146 * The program headers:
1147 */
1148 /* Everything in a single LOAD segment: */
1149 i = 0;
1150 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1151 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1152 pSymFile->aPhdrs[i].p_offset
1153 = pSymFile->aPhdrs[i].p_vaddr
1154 = pSymFile->aPhdrs[i].p_paddr = 0;
1155 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1156 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1157 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1158 i++;
1159 /* The .dynamic segment. */
1160 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1161 pSymFile->aPhdrs[i].p_flags = PF_R;
1162 pSymFile->aPhdrs[i].p_offset
1163 = pSymFile->aPhdrs[i].p_vaddr
1164 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1165 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1166 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1167 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1168 i++;
1169
1170 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1171
1172 /*
1173 * The dynamic section:
1174 */
1175 i = 0;
1176 pSymFile->aDyn[i].d_tag = DT_SONAME;
1177 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1178 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1179 i++;
1180 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1181 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1182 i++;
1183 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1184 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1185 i++;
1186 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1187 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1188 i++;
1189 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1190 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1191 i++;
1192 pSymFile->aDyn[i].d_tag = DT_NULL;
1193 i++;
1194 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1195# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1196
1197 /*
1198 * Symbol tables:
1199 */
1200 /** @todo gdb doesn't seem to really like this ... */
1201 i = 0;
1202 pSymFile->aSymbols[i].st_name = 0;
1203 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1204 pSymFile->aSymbols[i].st_value = 0;
1205 pSymFile->aSymbols[i].st_size = 0;
1206 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1207 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1208# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1209 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1210# endif
1211 i++;
1212
1213 pSymFile->aSymbols[i].st_name = 0;
1214 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1215 pSymFile->aSymbols[i].st_value = 0;
1216 pSymFile->aSymbols[i].st_size = 0;
1217 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1218 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1219 i++;
1220
1221 pSymFile->aSymbols[i].st_name = offStrTab;
1222 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1223# if 0
1224 pSymFile->aSymbols[i].st_shndx = iShText;
1225 pSymFile->aSymbols[i].st_value = 0;
1226# else
1227 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1228 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1229# endif
1230 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1231 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1232 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1233# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1234 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1235 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1236# endif
1237 i++;
1238
1239 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1240 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1241
1242 /*
1243 * The GDB JIT entry and informing GDB.
1244 */
1245 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1246# if 1
1247 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1248# else
1249 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1250# endif
1251
1252 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1253 RTCritSectEnter(&g_IemNativeGdbJitLock);
1254 pEhFrame->GdbJitEntry.pNext = NULL;
1255 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1256 if (__jit_debug_descriptor.pTail)
1257 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1258 else
1259 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1260 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1261 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1262
1263 /* Notify GDB: */
1264 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1265 __jit_debug_register_code();
1266 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1267 RTCritSectLeave(&g_IemNativeGdbJitLock);
1268
1269# else /* !IEMNATIVE_USE_GDB_JIT */
1270 RT_NOREF(pVCpu);
1271# endif /* !IEMNATIVE_USE_GDB_JIT */
1272
1273 return VINF_SUCCESS;
1274}
1275
1276# endif /* !RT_OS_WINDOWS */
1277#endif /* IN_RING3 */
1278
1279
1280/**
1281 * Adds another chunk to the executable memory allocator.
1282 *
1283 * This is used by the init code for the initial allocation and later by the
1284 * regular allocator function when it's out of memory.
1285 */
1286static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1287{
1288 /* Check that we've room for growth. */
1289 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1290 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1291
1292 /* Allocate a chunk. */
1293#ifdef RT_OS_DARWIN
1294 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1295#else
1296 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1297#endif
1298 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1299
1300 /*
1301 * Add the chunk.
1302 *
1303 * This must be done before the unwind init so windows can allocate
1304 * memory from the chunk when using the alternative sub-allocator.
1305 */
1306 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1307#ifdef IN_RING3
1308 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1309#endif
1310 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1311 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1312 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1313 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1314
1315 pExecMemAllocator->cChunks = idxChunk + 1;
1316 pExecMemAllocator->idxChunkHint = idxChunk;
1317
1318 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1319 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1320
1321#ifdef IN_RING3
1322 /*
1323 * Initialize the unwind information (this cannot really fail atm).
1324 * (This sets pvUnwindInfo.)
1325 */
1326 int rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1327 if (RT_SUCCESS(rc))
1328 { /* likely */ }
1329 else
1330 {
1331 /* Just in case the impossible happens, undo the above up: */
1332 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1333 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1334 pExecMemAllocator->cChunks = idxChunk;
1335 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1336 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1337 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1338 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1339
1340 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1341 return rc;
1342 }
1343#endif
1344 return VINF_SUCCESS;
1345}
1346
1347
1348/**
1349 * Initializes the executable memory allocator for native recompilation on the
1350 * calling EMT.
1351 *
1352 * @returns VBox status code.
1353 * @param pVCpu The cross context virtual CPU structure of the calling
1354 * thread.
1355 * @param cbMax The max size of the allocator.
1356 * @param cbInitial The initial allocator size.
1357 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1358 * dependent).
1359 */
1360int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1361{
1362 /*
1363 * Validate input.
1364 */
1365 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1366 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1367 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1368 || cbChunk == 0
1369 || ( RT_IS_POWER_OF_TWO(cbChunk)
1370 && cbChunk >= _1M
1371 && cbChunk <= _256M
1372 && cbChunk <= cbMax),
1373 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1374 VERR_OUT_OF_RANGE);
1375
1376 /*
1377 * Adjust/figure out the chunk size.
1378 */
1379 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1380 {
1381 if (cbMax >= _256M)
1382 cbChunk = _64M;
1383 else
1384 {
1385 if (cbMax < _16M)
1386 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1387 else
1388 cbChunk = (uint32_t)cbMax / 4;
1389 if (!RT_IS_POWER_OF_TWO(cbChunk))
1390 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1391 }
1392 }
1393
1394 if (cbChunk > cbMax)
1395 cbMax = cbChunk;
1396 else
1397 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1398 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1399 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1400
1401 /*
1402 * Allocate and initialize the allocatore instance.
1403 */
1404 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1405 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1406 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1407 cbNeeded += cbBitmap * cMaxChunks;
1408 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1409 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1410#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1411 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1412 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1413#endif
1414 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1415 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1416 VERR_NO_MEMORY);
1417 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1418 pExecMemAllocator->cbChunk = cbChunk;
1419 pExecMemAllocator->cMaxChunks = cMaxChunks;
1420 pExecMemAllocator->cChunks = 0;
1421 pExecMemAllocator->idxChunkHint = 0;
1422 pExecMemAllocator->cAllocations = 0;
1423 pExecMemAllocator->cbTotal = 0;
1424 pExecMemAllocator->cbFree = 0;
1425 pExecMemAllocator->cbAllocated = 0;
1426 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1427 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1428 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1429 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1430#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1431 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1432#endif
1433 for (uint32_t i = 0; i < cMaxChunks; i++)
1434 {
1435 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1436 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1437 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1438#ifdef IN_RING0
1439 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1440#else
1441 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1442#endif
1443 }
1444 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1445
1446 /*
1447 * Do the initial allocations.
1448 */
1449 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1450 {
1451 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1452 AssertLogRelRCReturn(rc, rc);
1453 }
1454
1455 pExecMemAllocator->idxChunkHint = 0;
1456
1457 /*
1458 * Register statistics.
1459 */
1460 PUVM const pUVM = pVCpu->pUVCpu->pUVM;
1461 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cAllocations, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1462 "Current number of allocations", "/IEM/CPU%u/re/ExecMem/cAllocations", pVCpu->idCpu);
1463 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cChunks, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
1464 "Currently allocated chunks", "/IEM/CPU%u/re/ExecMem/cChunks", pVCpu->idCpu);
1465 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cMaxChunks, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_COUNT,
1466 "Maximum number of chunks", "/IEM/CPU%u/re/ExecMem/cMaxChunks", pVCpu->idCpu);
1467 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbChunk, STAMTYPE_U32, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1468 "Allocation chunk size", "/IEM/CPU%u/re/ExecMem/cbChunk", pVCpu->idCpu);
1469 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbAllocated, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1470 "Number of bytes current allocated", "/IEM/CPU%u/re/ExecMem/cbAllocated", pVCpu->idCpu);
1471 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbFree, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1472 "Number of bytes current free", "/IEM/CPU%u/re/ExecMem/cbFree", pVCpu->idCpu);
1473 STAMR3RegisterFU(pUVM, &pExecMemAllocator->cbTotal, STAMTYPE_U64, STAMVISIBILITY_ALWAYS, STAMUNIT_BYTES,
1474 "Total number of byte", "/IEM/CPU%u/re/ExecMem/cbTotal", pVCpu->idCpu);
1475#ifdef VBOX_WITH_STATISTICS
1476 STAMR3RegisterFU(pUVM, &pExecMemAllocator->StatAlloc, STAMTYPE_PROFILE, STAMVISIBILITY_ALWAYS, STAMUNIT_TICKS_PER_CALL,
1477 "Profiling the allocator", "/IEM/CPU%u/re/ExecMem/ProfAlloc", pVCpu->idCpu);
1478#endif
1479
1480 return VINF_SUCCESS;
1481}
1482
1483
1484/*********************************************************************************************************************************
1485* Native Recompilation *
1486*********************************************************************************************************************************/
1487
1488
1489/**
1490 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1491 */
1492IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1493{
1494 pVCpu->iem.s.cInstructions += idxInstr;
1495 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1496}
1497
1498
1499/**
1500 * Used by TB code when it wants to raise a \#DE.
1501 */
1502IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseDe,(PVMCPUCC pVCpu))
1503{
1504 iemRaiseDivideErrorJmp(pVCpu);
1505#ifndef _MSC_VER
1506 return VINF_IEM_RAISED_XCPT; /* not reached */
1507#endif
1508}
1509
1510
1511/**
1512 * Used by TB code when it wants to raise a \#UD.
1513 */
1514IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
1515{
1516 iemRaiseUndefinedOpcodeJmp(pVCpu);
1517#ifndef _MSC_VER
1518 return VINF_IEM_RAISED_XCPT; /* not reached */
1519#endif
1520}
1521
1522
1523/**
1524 * Used by TB code when it wants to raise an SSE related \#UD or \#NM.
1525 *
1526 * See IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT.
1527 */
1528IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseRelated,(PVMCPUCC pVCpu))
1529{
1530 if ( (pVCpu->cpum.GstCtx.cr0 & X86_CR0_EM)
1531 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSFXSR))
1532 iemRaiseUndefinedOpcodeJmp(pVCpu);
1533 else
1534 iemRaiseDeviceNotAvailableJmp(pVCpu);
1535#ifndef _MSC_VER
1536 return VINF_IEM_RAISED_XCPT; /* not reached */
1537#endif
1538}
1539
1540
1541/**
1542 * Used by TB code when it wants to raise an AVX related \#UD or \#NM.
1543 *
1544 * See IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT.
1545 */
1546IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseAvxRelated,(PVMCPUCC pVCpu))
1547{
1548 if ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE)
1549 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE))
1550 iemRaiseUndefinedOpcodeJmp(pVCpu);
1551 else
1552 iemRaiseDeviceNotAvailableJmp(pVCpu);
1553#ifndef _MSC_VER
1554 return VINF_IEM_RAISED_XCPT; /* not reached */
1555#endif
1556}
1557
1558
1559/**
1560 * Used by TB code when it wants to raise an SSE/AVX floating point exception related \#UD or \#XF.
1561 *
1562 * See IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT.
1563 */
1564IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseAvxFpRelated,(PVMCPUCC pVCpu))
1565{
1566 if (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXMMEEXCPT)
1567 iemRaiseSimdFpExceptionJmp(pVCpu);
1568 else
1569 iemRaiseUndefinedOpcodeJmp(pVCpu);
1570#ifndef _MSC_VER
1571 return VINF_IEM_RAISED_XCPT; /* not reached */
1572#endif
1573}
1574
1575
1576/**
1577 * Used by TB code when it wants to raise a \#NM.
1578 */
1579IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
1580{
1581 iemRaiseDeviceNotAvailableJmp(pVCpu);
1582#ifndef _MSC_VER
1583 return VINF_IEM_RAISED_XCPT; /* not reached */
1584#endif
1585}
1586
1587
1588/**
1589 * Used by TB code when it wants to raise a \#GP(0).
1590 */
1591IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
1592{
1593 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1594#ifndef _MSC_VER
1595 return VINF_IEM_RAISED_XCPT; /* not reached */
1596#endif
1597}
1598
1599
1600/**
1601 * Used by TB code when it wants to raise a \#MF.
1602 */
1603IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
1604{
1605 iemRaiseMathFaultJmp(pVCpu);
1606#ifndef _MSC_VER
1607 return VINF_IEM_RAISED_XCPT; /* not reached */
1608#endif
1609}
1610
1611
1612/**
1613 * Used by TB code when it wants to raise a \#XF.
1614 */
1615IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
1616{
1617 iemRaiseSimdFpExceptionJmp(pVCpu);
1618#ifndef _MSC_VER
1619 return VINF_IEM_RAISED_XCPT; /* not reached */
1620#endif
1621}
1622
1623
1624/**
1625 * Used by TB code when detecting opcode changes.
1626 * @see iemThreadeFuncWorkerObsoleteTb
1627 */
1628IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
1629{
1630 /* We set fSafeToFree to false where as we're being called in the context
1631 of a TB callback function, which for native TBs means we cannot release
1632 the executable memory till we've returned our way back to iemTbExec as
1633 that return path codes via the native code generated for the TB. */
1634 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1635 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
1636 return VINF_IEM_REEXEC_BREAK;
1637}
1638
1639
1640/**
1641 * Used by TB code when we need to switch to a TB with CS.LIM checking.
1642 */
1643IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
1644{
1645 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
1646 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1647 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
1648 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
1649 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
1650 return VINF_IEM_REEXEC_BREAK;
1651}
1652
1653
1654/**
1655 * Used by TB code when we missed a PC check after a branch.
1656 */
1657IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
1658{
1659 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
1660 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1661 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
1662 pVCpu->iem.s.pbInstrBuf));
1663 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
1664 return VINF_IEM_REEXEC_BREAK;
1665}
1666
1667
1668
1669/*********************************************************************************************************************************
1670* Helpers: Segmented memory fetches and stores. *
1671*********************************************************************************************************************************/
1672
1673/**
1674 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1675 */
1676IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1677{
1678#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1679 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1680#else
1681 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1682#endif
1683}
1684
1685
1686/**
1687 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1688 * to 16 bits.
1689 */
1690IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1691{
1692#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1693 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1694#else
1695 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1696#endif
1697}
1698
1699
1700/**
1701 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1702 * to 32 bits.
1703 */
1704IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1705{
1706#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1707 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1708#else
1709 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1710#endif
1711}
1712
1713/**
1714 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1715 * to 64 bits.
1716 */
1717IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1718{
1719#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1720 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1721#else
1722 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1723#endif
1724}
1725
1726
1727/**
1728 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1729 */
1730IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1731{
1732#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1733 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1734#else
1735 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1736#endif
1737}
1738
1739
1740/**
1741 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1742 * to 32 bits.
1743 */
1744IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1745{
1746#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1747 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1748#else
1749 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1750#endif
1751}
1752
1753
1754/**
1755 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1756 * to 64 bits.
1757 */
1758IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1759{
1760#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1761 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1762#else
1763 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1764#endif
1765}
1766
1767
1768/**
1769 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1770 */
1771IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1772{
1773#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1774 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1775#else
1776 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1777#endif
1778}
1779
1780
1781/**
1782 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
1783 * to 64 bits.
1784 */
1785IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1786{
1787#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1788 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1789#else
1790 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1791#endif
1792}
1793
1794
1795/**
1796 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1797 */
1798IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1799{
1800#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1801 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
1802#else
1803 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
1804#endif
1805}
1806
1807
1808#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1809/**
1810 * Used by TB code to load 128-bit data w/ segmentation.
1811 */
1812IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
1813{
1814#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1815 iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1816#else
1817 iemMemFetchDataU128Jmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1818#endif
1819}
1820
1821
1822/**
1823 * Used by TB code to load 128-bit data w/ segmentation.
1824 */
1825IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
1826{
1827#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1828 iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1829#else
1830 iemMemFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1831#endif
1832}
1833
1834
1835/**
1836 * Used by TB code to load 128-bit data w/ segmentation.
1837 */
1838IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
1839{
1840#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1841 iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1842#else
1843 iemMemFetchDataU128NoAcJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
1844#endif
1845}
1846
1847
1848/**
1849 * Used by TB code to load 256-bit data w/ segmentation.
1850 */
1851IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
1852{
1853#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1854 iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
1855#else
1856 iemMemFetchDataU256NoAcJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
1857#endif
1858}
1859
1860
1861/**
1862 * Used by TB code to load 256-bit data w/ segmentation.
1863 */
1864IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
1865{
1866#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1867 iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
1868#else
1869 iemMemFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
1870#endif
1871}
1872#endif
1873
1874
1875/**
1876 * Used by TB code to store unsigned 8-bit data w/ segmentation.
1877 */
1878IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
1879{
1880#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1881 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1882#else
1883 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1884#endif
1885}
1886
1887
1888/**
1889 * Used by TB code to store unsigned 16-bit data w/ segmentation.
1890 */
1891IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
1892{
1893#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1894 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1895#else
1896 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1897#endif
1898}
1899
1900
1901/**
1902 * Used by TB code to store unsigned 32-bit data w/ segmentation.
1903 */
1904IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
1905{
1906#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1907 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1908#else
1909 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1910#endif
1911}
1912
1913
1914/**
1915 * Used by TB code to store unsigned 64-bit data w/ segmentation.
1916 */
1917IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
1918{
1919#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1920 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1921#else
1922 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1923#endif
1924}
1925
1926
1927#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1928/**
1929 * Used by TB code to store unsigned 128-bit data w/ segmentation.
1930 */
1931IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
1932{
1933#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1934 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
1935#else
1936 iemMemStoreDataU128AlignedSseJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
1937#endif
1938}
1939
1940
1941/**
1942 * Used by TB code to store unsigned 128-bit data w/ segmentation.
1943 */
1944IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
1945{
1946#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1947 iemMemStoreDataU128NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
1948#else
1949 iemMemStoreDataU128NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
1950#endif
1951}
1952
1953
1954/**
1955 * Used by TB code to store unsigned 256-bit data w/ segmentation.
1956 */
1957IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
1958{
1959#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1960 iemMemStoreDataU256NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
1961#else
1962 iemMemStoreDataU256NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
1963#endif
1964}
1965
1966
1967/**
1968 * Used by TB code to store unsigned 256-bit data w/ segmentation.
1969 */
1970IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
1971{
1972#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1973 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
1974#else
1975 iemMemStoreDataU256AlignedAvxcJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
1976#endif
1977}
1978#endif
1979
1980
1981
1982/**
1983 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
1984 */
1985IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1986{
1987#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1988 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1989#else
1990 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1991#endif
1992}
1993
1994
1995/**
1996 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
1997 */
1998IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1999{
2000#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2001 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2002#else
2003 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2004#endif
2005}
2006
2007
2008/**
2009 * Used by TB code to store an 32-bit selector value onto a generic stack.
2010 *
2011 * Intel CPUs doesn't do write a whole dword, thus the special function.
2012 */
2013IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2014{
2015#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2016 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2017#else
2018 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2019#endif
2020}
2021
2022
2023/**
2024 * Used by TB code to push unsigned 64-bit value onto a generic stack.
2025 */
2026IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2027{
2028#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2029 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2030#else
2031 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2032#endif
2033}
2034
2035
2036/**
2037 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2038 */
2039IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2040{
2041#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2042 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2043#else
2044 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
2045#endif
2046}
2047
2048
2049/**
2050 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2051 */
2052IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2053{
2054#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2055 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2056#else
2057 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
2058#endif
2059}
2060
2061
2062/**
2063 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2064 */
2065IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2066{
2067#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2068 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2069#else
2070 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
2071#endif
2072}
2073
2074
2075
2076/*********************************************************************************************************************************
2077* Helpers: Flat memory fetches and stores. *
2078*********************************************************************************************************************************/
2079
2080/**
2081 * Used by TB code to load unsigned 8-bit data w/ flat address.
2082 * @note Zero extending the value to 64-bit to simplify assembly.
2083 */
2084IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2085{
2086#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2087 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2088#else
2089 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2090#endif
2091}
2092
2093
2094/**
2095 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2096 * to 16 bits.
2097 * @note Zero extending the value to 64-bit to simplify assembly.
2098 */
2099IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2100{
2101#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2102 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2103#else
2104 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2105#endif
2106}
2107
2108
2109/**
2110 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2111 * to 32 bits.
2112 * @note Zero extending the value to 64-bit to simplify assembly.
2113 */
2114IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2115{
2116#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2117 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2118#else
2119 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2120#endif
2121}
2122
2123
2124/**
2125 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2126 * to 64 bits.
2127 */
2128IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2129{
2130#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2131 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2132#else
2133 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2134#endif
2135}
2136
2137
2138/**
2139 * Used by TB code to load unsigned 16-bit data w/ flat address.
2140 * @note Zero extending the value to 64-bit to simplify assembly.
2141 */
2142IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2143{
2144#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2145 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2146#else
2147 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2148#endif
2149}
2150
2151
2152/**
2153 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2154 * to 32 bits.
2155 * @note Zero extending the value to 64-bit to simplify assembly.
2156 */
2157IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2158{
2159#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2160 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2161#else
2162 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2163#endif
2164}
2165
2166
2167/**
2168 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2169 * to 64 bits.
2170 * @note Zero extending the value to 64-bit to simplify assembly.
2171 */
2172IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2173{
2174#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2175 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2176#else
2177 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2178#endif
2179}
2180
2181
2182/**
2183 * Used by TB code to load unsigned 32-bit data w/ flat address.
2184 * @note Zero extending the value to 64-bit to simplify assembly.
2185 */
2186IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2187{
2188#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2189 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2190#else
2191 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2192#endif
2193}
2194
2195
2196/**
2197 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
2198 * to 64 bits.
2199 * @note Zero extending the value to 64-bit to simplify assembly.
2200 */
2201IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2202{
2203#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2204 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2205#else
2206 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2207#endif
2208}
2209
2210
2211/**
2212 * Used by TB code to load unsigned 64-bit data w/ flat address.
2213 */
2214IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2215{
2216#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2217 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2218#else
2219 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
2220#endif
2221}
2222
2223
2224#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2225/**
2226 * Used by TB code to load unsigned 128-bit data w/ flat address.
2227 */
2228IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
2229{
2230#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2231 return iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2232#else
2233 return iemMemFlatFetchDataU128Jmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2234#endif
2235}
2236
2237
2238/**
2239 * Used by TB code to load unsigned 128-bit data w/ flat address.
2240 */
2241IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
2242{
2243#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2244 return iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2245#else
2246 return iemMemFlatFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2247#endif
2248}
2249
2250
2251/**
2252 * Used by TB code to load unsigned 128-bit data w/ flat address.
2253 */
2254IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
2255{
2256#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2257 return iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2258#else
2259 return iemMemFlatFetchDataU128NoAcJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
2260#endif
2261}
2262
2263
2264/**
2265 * Used by TB code to load unsigned 256-bit data w/ flat address.
2266 */
2267IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
2268{
2269#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2270 return iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
2271#else
2272 return iemMemFlatFetchDataU256NoAcJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
2273#endif
2274}
2275
2276
2277/**
2278 * Used by TB code to load unsigned 256-bit data w/ flat address.
2279 */
2280IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
2281{
2282#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2283 return iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
2284#else
2285 return iemMemFlatFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
2286#endif
2287}
2288#endif
2289
2290
2291/**
2292 * Used by TB code to store unsigned 8-bit data w/ flat address.
2293 */
2294IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
2295{
2296#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2297 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
2298#else
2299 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
2300#endif
2301}
2302
2303
2304/**
2305 * Used by TB code to store unsigned 16-bit data w/ flat address.
2306 */
2307IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2308{
2309#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2310 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
2311#else
2312 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
2313#endif
2314}
2315
2316
2317/**
2318 * Used by TB code to store unsigned 32-bit data w/ flat address.
2319 */
2320IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2321{
2322#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2323 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
2324#else
2325 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
2326#endif
2327}
2328
2329
2330/**
2331 * Used by TB code to store unsigned 64-bit data w/ flat address.
2332 */
2333IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2334{
2335#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2336 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
2337#else
2338 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
2339#endif
2340}
2341
2342
2343#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2344/**
2345 * Used by TB code to store unsigned 128-bit data w/ flat address.
2346 */
2347IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
2348{
2349#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2350 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
2351#else
2352 iemMemFlatStoreDataU128AlignedSseJmp(pVCpu, GCPtrMem, pu128Src);
2353#endif
2354}
2355
2356
2357/**
2358 * Used by TB code to store unsigned 128-bit data w/ flat address.
2359 */
2360IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
2361{
2362#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2363 iemMemStoreDataU128NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
2364#else
2365 iemMemFlatStoreDataU128NoAcJmp(pVCpu, GCPtrMem, pu128Src);
2366#endif
2367}
2368
2369
2370/**
2371 * Used by TB code to store unsigned 256-bit data w/ flat address.
2372 */
2373IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
2374{
2375#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2376 iemMemStoreDataU256NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
2377#else
2378 iemMemFlatStoreDataU256NoAcJmp(pVCpu, GCPtrMem, pu256Src);
2379#endif
2380}
2381
2382
2383/**
2384 * Used by TB code to store unsigned 256-bit data w/ flat address.
2385 */
2386IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
2387{
2388#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2389 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
2390#else
2391 iemMemFlatStoreDataU256AlignedAvxJmp(pVCpu, GCPtrMem, pu256Src);
2392#endif
2393}
2394#endif
2395
2396
2397
2398/**
2399 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
2400 */
2401IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2402{
2403#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2404 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2405#else
2406 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2407#endif
2408}
2409
2410
2411/**
2412 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
2413 */
2414IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2415{
2416#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2417 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2418#else
2419 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2420#endif
2421}
2422
2423
2424/**
2425 * Used by TB code to store a segment selector value onto a flat stack.
2426 *
2427 * Intel CPUs doesn't do write a whole dword, thus the special function.
2428 */
2429IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2430{
2431#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2432 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2433#else
2434 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2435#endif
2436}
2437
2438
2439/**
2440 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
2441 */
2442IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2443{
2444#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2445 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2446#else
2447 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2448#endif
2449}
2450
2451
2452/**
2453 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2454 */
2455IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2456{
2457#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2458 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2459#else
2460 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
2461#endif
2462}
2463
2464
2465/**
2466 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2467 */
2468IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2469{
2470#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2471 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2472#else
2473 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
2474#endif
2475}
2476
2477
2478/**
2479 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2480 */
2481IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2482{
2483#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2484 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2485#else
2486 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
2487#endif
2488}
2489
2490
2491
2492/*********************************************************************************************************************************
2493* Helpers: Segmented memory mapping. *
2494*********************************************************************************************************************************/
2495
2496/**
2497 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
2498 * segmentation.
2499 */
2500IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2501 RTGCPTR GCPtrMem, uint8_t iSegReg))
2502{
2503#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2504 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2505#else
2506 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2507#endif
2508}
2509
2510
2511/**
2512 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
2513 */
2514IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2515 RTGCPTR GCPtrMem, uint8_t iSegReg))
2516{
2517#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2518 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2519#else
2520 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2521#endif
2522}
2523
2524
2525/**
2526 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
2527 */
2528IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2529 RTGCPTR GCPtrMem, uint8_t iSegReg))
2530{
2531#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2532 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2533#else
2534 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2535#endif
2536}
2537
2538
2539/**
2540 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
2541 */
2542IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2543 RTGCPTR GCPtrMem, uint8_t iSegReg))
2544{
2545#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2546 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2547#else
2548 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2549#endif
2550}
2551
2552
2553/**
2554 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
2555 * segmentation.
2556 */
2557IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2558 RTGCPTR GCPtrMem, uint8_t iSegReg))
2559{
2560#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2561 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2562#else
2563 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2564#endif
2565}
2566
2567
2568/**
2569 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
2570 */
2571IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2572 RTGCPTR GCPtrMem, uint8_t iSegReg))
2573{
2574#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2575 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2576#else
2577 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2578#endif
2579}
2580
2581
2582/**
2583 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
2584 */
2585IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2586 RTGCPTR GCPtrMem, uint8_t iSegReg))
2587{
2588#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2589 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2590#else
2591 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2592#endif
2593}
2594
2595
2596/**
2597 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
2598 */
2599IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2600 RTGCPTR GCPtrMem, uint8_t iSegReg))
2601{
2602#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2603 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2604#else
2605 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2606#endif
2607}
2608
2609
2610/**
2611 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
2612 * segmentation.
2613 */
2614IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2615 RTGCPTR GCPtrMem, uint8_t iSegReg))
2616{
2617#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2618 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2619#else
2620 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2621#endif
2622}
2623
2624
2625/**
2626 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
2627 */
2628IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2629 RTGCPTR GCPtrMem, uint8_t iSegReg))
2630{
2631#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2632 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2633#else
2634 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2635#endif
2636}
2637
2638
2639/**
2640 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
2641 */
2642IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2643 RTGCPTR GCPtrMem, uint8_t iSegReg))
2644{
2645#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2646 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2647#else
2648 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2649#endif
2650}
2651
2652
2653/**
2654 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
2655 */
2656IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2657 RTGCPTR GCPtrMem, uint8_t iSegReg))
2658{
2659#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2660 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2661#else
2662 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2663#endif
2664}
2665
2666
2667/**
2668 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
2669 * segmentation.
2670 */
2671IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2672 RTGCPTR GCPtrMem, uint8_t iSegReg))
2673{
2674#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2675 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2676#else
2677 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2678#endif
2679}
2680
2681
2682/**
2683 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
2684 */
2685IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2686 RTGCPTR GCPtrMem, uint8_t iSegReg))
2687{
2688#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2689 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2690#else
2691 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2692#endif
2693}
2694
2695
2696/**
2697 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
2698 */
2699IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2700 RTGCPTR GCPtrMem, uint8_t iSegReg))
2701{
2702#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2703 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2704#else
2705 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2706#endif
2707}
2708
2709
2710/**
2711 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
2712 */
2713IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2714 RTGCPTR GCPtrMem, uint8_t iSegReg))
2715{
2716#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2717 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2718#else
2719 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2720#endif
2721}
2722
2723
2724/**
2725 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
2726 */
2727IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2728 RTGCPTR GCPtrMem, uint8_t iSegReg))
2729{
2730#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2731 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2732#else
2733 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2734#endif
2735}
2736
2737
2738/**
2739 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
2740 */
2741IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2742 RTGCPTR GCPtrMem, uint8_t iSegReg))
2743{
2744#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2745 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2746#else
2747 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2748#endif
2749}
2750
2751
2752/**
2753 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
2754 * segmentation.
2755 */
2756IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2757 RTGCPTR GCPtrMem, uint8_t iSegReg))
2758{
2759#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2760 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2761#else
2762 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2763#endif
2764}
2765
2766
2767/**
2768 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
2769 */
2770IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2771 RTGCPTR GCPtrMem, uint8_t iSegReg))
2772{
2773#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2774 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2775#else
2776 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2777#endif
2778}
2779
2780
2781/**
2782 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
2783 */
2784IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2785 RTGCPTR GCPtrMem, uint8_t iSegReg))
2786{
2787#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2788 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2789#else
2790 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2791#endif
2792}
2793
2794
2795/**
2796 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
2797 */
2798IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2799 RTGCPTR GCPtrMem, uint8_t iSegReg))
2800{
2801#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2802 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2803#else
2804 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2805#endif
2806}
2807
2808
2809/*********************************************************************************************************************************
2810* Helpers: Flat memory mapping. *
2811*********************************************************************************************************************************/
2812
2813/**
2814 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
2815 * address.
2816 */
2817IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2818{
2819#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2820 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2821#else
2822 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2823#endif
2824}
2825
2826
2827/**
2828 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
2829 */
2830IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2831{
2832#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2833 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2834#else
2835 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2836#endif
2837}
2838
2839
2840/**
2841 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
2842 */
2843IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2844{
2845#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2846 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2847#else
2848 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2849#endif
2850}
2851
2852
2853/**
2854 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
2855 */
2856IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2857{
2858#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2859 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2860#else
2861 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2862#endif
2863}
2864
2865
2866/**
2867 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
2868 * address.
2869 */
2870IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2871{
2872#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2873 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2874#else
2875 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2876#endif
2877}
2878
2879
2880/**
2881 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
2882 */
2883IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2884{
2885#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2886 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2887#else
2888 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2889#endif
2890}
2891
2892
2893/**
2894 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
2895 */
2896IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2897{
2898#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2899 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2900#else
2901 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2902#endif
2903}
2904
2905
2906/**
2907 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
2908 */
2909IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2910{
2911#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2912 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2913#else
2914 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2915#endif
2916}
2917
2918
2919/**
2920 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
2921 * address.
2922 */
2923IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2924{
2925#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2926 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2927#else
2928 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2929#endif
2930}
2931
2932
2933/**
2934 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
2935 */
2936IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2937{
2938#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2939 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2940#else
2941 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2942#endif
2943}
2944
2945
2946/**
2947 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
2948 */
2949IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2950{
2951#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2952 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2953#else
2954 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2955#endif
2956}
2957
2958
2959/**
2960 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
2961 */
2962IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2963{
2964#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2965 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2966#else
2967 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2968#endif
2969}
2970
2971
2972/**
2973 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
2974 * address.
2975 */
2976IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2977{
2978#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2979 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2980#else
2981 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2982#endif
2983}
2984
2985
2986/**
2987 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
2988 */
2989IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2990{
2991#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2992 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2993#else
2994 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2995#endif
2996}
2997
2998
2999/**
3000 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
3001 */
3002IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3003{
3004#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3005 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3006#else
3007 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3008#endif
3009}
3010
3011
3012/**
3013 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
3014 */
3015IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3016{
3017#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3018 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3019#else
3020 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3021#endif
3022}
3023
3024
3025/**
3026 * Used by TB code to map 80-bit float data writeonly w/ flat address.
3027 */
3028IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3029{
3030#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3031 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3032#else
3033 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3034#endif
3035}
3036
3037
3038/**
3039 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
3040 */
3041IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3042{
3043#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3044 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3045#else
3046 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3047#endif
3048}
3049
3050
3051/**
3052 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
3053 * address.
3054 */
3055IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3056{
3057#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3058 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3059#else
3060 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3061#endif
3062}
3063
3064
3065/**
3066 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
3067 */
3068IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3069{
3070#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3071 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3072#else
3073 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3074#endif
3075}
3076
3077
3078/**
3079 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
3080 */
3081IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3082{
3083#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3084 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3085#else
3086 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3087#endif
3088}
3089
3090
3091/**
3092 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
3093 */
3094IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
3095{
3096#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
3097 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
3098#else
3099 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
3100#endif
3101}
3102
3103
3104/*********************************************************************************************************************************
3105* Helpers: Commit, rollback & unmap *
3106*********************************************************************************************************************************/
3107
3108/**
3109 * Used by TB code to commit and unmap a read-write memory mapping.
3110 */
3111IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3112{
3113 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
3114}
3115
3116
3117/**
3118 * Used by TB code to commit and unmap a read-write memory mapping.
3119 */
3120IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3121{
3122 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
3123}
3124
3125
3126/**
3127 * Used by TB code to commit and unmap a write-only memory mapping.
3128 */
3129IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3130{
3131 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
3132}
3133
3134
3135/**
3136 * Used by TB code to commit and unmap a read-only memory mapping.
3137 */
3138IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
3139{
3140 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
3141}
3142
3143
3144/**
3145 * Reinitializes the native recompiler state.
3146 *
3147 * Called before starting a new recompile job.
3148 */
3149static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
3150{
3151 pReNative->cLabels = 0;
3152 pReNative->bmLabelTypes = 0;
3153 pReNative->cFixups = 0;
3154#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3155 pReNative->pDbgInfo->cEntries = 0;
3156 pReNative->pDbgInfo->offNativeLast = UINT32_MAX;
3157#endif
3158 pReNative->pTbOrg = pTb;
3159 pReNative->cCondDepth = 0;
3160 pReNative->uCondSeqNo = 0;
3161 pReNative->uCheckIrqSeqNo = 0;
3162 pReNative->uTlbSeqNo = 0;
3163
3164#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3165 pReNative->Core.offPc = 0;
3166 pReNative->Core.cInstrPcUpdateSkipped = 0;
3167#endif
3168#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3169 pReNative->fSimdRaiseXcptChecksEmitted = 0;
3170#endif
3171 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
3172#if IEMNATIVE_HST_GREG_COUNT < 32
3173 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
3174#endif
3175 ;
3176 pReNative->Core.bmHstRegsWithGstShadow = 0;
3177 pReNative->Core.bmGstRegShadows = 0;
3178#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3179 pReNative->Core.bmGstRegShadowDirty = 0;
3180#endif
3181 pReNative->Core.bmVars = 0;
3182 pReNative->Core.bmStack = 0;
3183 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
3184 pReNative->Core.u64ArgVars = UINT64_MAX;
3185
3186 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 17);
3187 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
3188 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
3189 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
3190 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
3191 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
3192 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
3193 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
3194 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
3195 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
3196 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
3197 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
3198 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
3199 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
3200 pReNative->aidxUniqueLabels[13] = UINT32_MAX;
3201 pReNative->aidxUniqueLabels[14] = UINT32_MAX;
3202 pReNative->aidxUniqueLabels[15] = UINT32_MAX;
3203 pReNative->aidxUniqueLabels[16] = UINT32_MAX;
3204
3205 /* Full host register reinit: */
3206 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
3207 {
3208 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
3209 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
3210 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
3211 }
3212
3213 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
3214 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
3215#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3216 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
3217#endif
3218#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3219 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
3220#endif
3221#ifdef IEMNATIVE_REG_FIXED_TMP1
3222 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
3223#endif
3224#ifdef IEMNATIVE_REG_FIXED_PC_DBG
3225 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
3226#endif
3227 );
3228 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3229 {
3230 fRegs &= ~RT_BIT_32(idxReg);
3231 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
3232 }
3233
3234 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
3235#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3236 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
3237#endif
3238#ifdef IEMNATIVE_REG_FIXED_TMP0
3239 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3240#endif
3241#ifdef IEMNATIVE_REG_FIXED_TMP1
3242 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
3243#endif
3244#ifdef IEMNATIVE_REG_FIXED_PC_DBG
3245 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
3246#endif
3247
3248#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3249 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
3250# if IEMNATIVE_HST_SIMD_REG_COUNT < 32
3251 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
3252# endif
3253 ;
3254 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
3255 pReNative->Core.bmGstSimdRegShadows = 0;
3256 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
3257 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
3258
3259 /* Full host register reinit: */
3260 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
3261 {
3262 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
3263 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
3264 pReNative->Core.aHstSimdRegs[i].idxVar = UINT8_MAX;
3265 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
3266 }
3267
3268 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK;
3269 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3270 {
3271 fRegs &= ~RT_BIT_32(idxReg);
3272 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
3273 }
3274
3275#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
3276 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3277#endif
3278
3279#endif
3280
3281 return pReNative;
3282}
3283
3284
3285/**
3286 * Allocates and initializes the native recompiler state.
3287 *
3288 * This is called the first time an EMT wants to recompile something.
3289 *
3290 * @returns Pointer to the new recompiler state.
3291 * @param pVCpu The cross context virtual CPU structure of the calling
3292 * thread.
3293 * @param pTb The TB that's about to be recompiled.
3294 * @thread EMT(pVCpu)
3295 */
3296static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
3297{
3298 VMCPU_ASSERT_EMT(pVCpu);
3299
3300 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
3301 AssertReturn(pReNative, NULL);
3302
3303 /*
3304 * Try allocate all the buffers and stuff we need.
3305 */
3306 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
3307 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
3308 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
3309#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3310 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
3311#endif
3312 if (RT_LIKELY( pReNative->pInstrBuf
3313 && pReNative->paLabels
3314 && pReNative->paFixups)
3315#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3316 && pReNative->pDbgInfo
3317#endif
3318 )
3319 {
3320 /*
3321 * Set the buffer & array sizes on success.
3322 */
3323 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
3324 pReNative->cLabelsAlloc = _8K;
3325 pReNative->cFixupsAlloc = _16K;
3326#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3327 pReNative->cDbgInfoAlloc = _16K;
3328#endif
3329
3330 /* Other constant stuff: */
3331 pReNative->pVCpu = pVCpu;
3332
3333 /*
3334 * Done, just need to save it and reinit it.
3335 */
3336 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
3337 return iemNativeReInit(pReNative, pTb);
3338 }
3339
3340 /*
3341 * Failed. Cleanup and return.
3342 */
3343 AssertFailed();
3344 RTMemFree(pReNative->pInstrBuf);
3345 RTMemFree(pReNative->paLabels);
3346 RTMemFree(pReNative->paFixups);
3347#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3348 RTMemFree(pReNative->pDbgInfo);
3349#endif
3350 RTMemFree(pReNative);
3351 return NULL;
3352}
3353
3354
3355/**
3356 * Creates a label
3357 *
3358 * If the label does not yet have a defined position,
3359 * call iemNativeLabelDefine() later to set it.
3360 *
3361 * @returns Label ID. Throws VBox status code on failure, so no need to check
3362 * the return value.
3363 * @param pReNative The native recompile state.
3364 * @param enmType The label type.
3365 * @param offWhere The instruction offset of the label. UINT32_MAX if the
3366 * label is not yet defined (default).
3367 * @param uData Data associated with the lable. Only applicable to
3368 * certain type of labels. Default is zero.
3369 */
3370DECL_HIDDEN_THROW(uint32_t)
3371iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3372 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
3373{
3374 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
3375
3376 /*
3377 * Locate existing label definition.
3378 *
3379 * This is only allowed for forward declarations where offWhere=UINT32_MAX
3380 * and uData is zero.
3381 */
3382 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3383 uint32_t const cLabels = pReNative->cLabels;
3384 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
3385#ifndef VBOX_STRICT
3386 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
3387 && offWhere == UINT32_MAX
3388 && uData == 0
3389#endif
3390 )
3391 {
3392#ifndef VBOX_STRICT
3393 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
3394 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3395 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
3396 if (idxLabel < pReNative->cLabels)
3397 return idxLabel;
3398#else
3399 for (uint32_t i = 0; i < cLabels; i++)
3400 if ( paLabels[i].enmType == enmType
3401 && paLabels[i].uData == uData)
3402 {
3403 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3404 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3405 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
3406 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
3407 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3408 return i;
3409 }
3410 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
3411 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3412#endif
3413 }
3414
3415 /*
3416 * Make sure we've got room for another label.
3417 */
3418 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
3419 { /* likely */ }
3420 else
3421 {
3422 uint32_t cNew = pReNative->cLabelsAlloc;
3423 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3424 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3425 cNew *= 2;
3426 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
3427 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
3428 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
3429 pReNative->paLabels = paLabels;
3430 pReNative->cLabelsAlloc = cNew;
3431 }
3432
3433 /*
3434 * Define a new label.
3435 */
3436 paLabels[cLabels].off = offWhere;
3437 paLabels[cLabels].enmType = enmType;
3438 paLabels[cLabels].uData = uData;
3439 pReNative->cLabels = cLabels + 1;
3440
3441 Assert((unsigned)enmType < 64);
3442 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
3443
3444 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3445 {
3446 Assert(uData == 0);
3447 pReNative->aidxUniqueLabels[enmType] = cLabels;
3448 }
3449
3450 if (offWhere != UINT32_MAX)
3451 {
3452#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3453 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3454 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
3455#endif
3456 }
3457 return cLabels;
3458}
3459
3460
3461/**
3462 * Defines the location of an existing label.
3463 *
3464 * @param pReNative The native recompile state.
3465 * @param idxLabel The label to define.
3466 * @param offWhere The position.
3467 */
3468DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
3469{
3470 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
3471 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
3472 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
3473 pLabel->off = offWhere;
3474#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3475 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3476 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
3477#endif
3478}
3479
3480
3481/**
3482 * Looks up a lable.
3483 *
3484 * @returns Label ID if found, UINT32_MAX if not.
3485 */
3486static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3487 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
3488{
3489 Assert((unsigned)enmType < 64);
3490 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
3491 {
3492 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3493 return pReNative->aidxUniqueLabels[enmType];
3494
3495 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3496 uint32_t const cLabels = pReNative->cLabels;
3497 for (uint32_t i = 0; i < cLabels; i++)
3498 if ( paLabels[i].enmType == enmType
3499 && paLabels[i].uData == uData
3500 && ( paLabels[i].off == offWhere
3501 || offWhere == UINT32_MAX
3502 || paLabels[i].off == UINT32_MAX))
3503 return i;
3504 }
3505 return UINT32_MAX;
3506}
3507
3508
3509/**
3510 * Adds a fixup.
3511 *
3512 * @throws VBox status code (int) on failure.
3513 * @param pReNative The native recompile state.
3514 * @param offWhere The instruction offset of the fixup location.
3515 * @param idxLabel The target label ID for the fixup.
3516 * @param enmType The fixup type.
3517 * @param offAddend Fixup addend if applicable to the type. Default is 0.
3518 */
3519DECL_HIDDEN_THROW(void)
3520iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
3521 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
3522{
3523 Assert(idxLabel <= UINT16_MAX);
3524 Assert((unsigned)enmType <= UINT8_MAX);
3525#ifdef RT_ARCH_ARM64
3526 AssertStmt( enmType != kIemNativeFixupType_RelImm14At5
3527 || pReNative->paLabels[idxLabel].enmType >= kIemNativeLabelType_LastWholeTbBranch,
3528 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_SHORT_JMP_TO_TAIL_LABEL));
3529#endif
3530
3531 /*
3532 * Make sure we've room.
3533 */
3534 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
3535 uint32_t const cFixups = pReNative->cFixups;
3536 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
3537 { /* likely */ }
3538 else
3539 {
3540 uint32_t cNew = pReNative->cFixupsAlloc;
3541 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3542 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3543 cNew *= 2;
3544 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
3545 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
3546 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
3547 pReNative->paFixups = paFixups;
3548 pReNative->cFixupsAlloc = cNew;
3549 }
3550
3551 /*
3552 * Add the fixup.
3553 */
3554 paFixups[cFixups].off = offWhere;
3555 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
3556 paFixups[cFixups].enmType = enmType;
3557 paFixups[cFixups].offAddend = offAddend;
3558 pReNative->cFixups = cFixups + 1;
3559}
3560
3561
3562/**
3563 * Slow code path for iemNativeInstrBufEnsure.
3564 */
3565DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
3566{
3567 /* Double the buffer size till we meet the request. */
3568 uint32_t cNew = pReNative->cInstrBufAlloc;
3569 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
3570 do
3571 cNew *= 2;
3572 while (cNew < off + cInstrReq);
3573
3574 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
3575#ifdef RT_ARCH_ARM64
3576 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
3577#else
3578 uint32_t const cbMaxInstrBuf = _2M;
3579#endif
3580 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
3581
3582 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
3583 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
3584
3585#ifdef VBOX_STRICT
3586 pReNative->offInstrBufChecked = off + cInstrReq;
3587#endif
3588 pReNative->cInstrBufAlloc = cNew;
3589 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
3590}
3591
3592#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3593
3594/**
3595 * Grows the static debug info array used during recompilation.
3596 *
3597 * @returns Pointer to the new debug info block; throws VBox status code on
3598 * failure, so no need to check the return value.
3599 */
3600DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3601{
3602 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
3603 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
3604 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
3605 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
3606 pReNative->pDbgInfo = pDbgInfo;
3607 pReNative->cDbgInfoAlloc = cNew;
3608 return pDbgInfo;
3609}
3610
3611
3612/**
3613 * Adds a new debug info uninitialized entry, returning the pointer to it.
3614 */
3615DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3616{
3617 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
3618 { /* likely */ }
3619 else
3620 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
3621 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
3622}
3623
3624
3625/**
3626 * Debug Info: Adds a native offset record, if necessary.
3627 */
3628DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3629{
3630 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
3631
3632 /*
3633 * Do we need this one?
3634 */
3635 uint32_t const offPrev = pDbgInfo->offNativeLast;
3636 if (offPrev == off)
3637 return;
3638 AssertStmt(offPrev < off || offPrev == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
3639
3640 /*
3641 * Add it.
3642 */
3643 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
3644 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
3645 pEntry->NativeOffset.offNative = off;
3646 pDbgInfo->offNativeLast = off;
3647}
3648
3649
3650/**
3651 * Debug Info: Record info about a label.
3652 */
3653static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
3654{
3655 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3656 pEntry->Label.uType = kIemTbDbgEntryType_Label;
3657 pEntry->Label.uUnused = 0;
3658 pEntry->Label.enmLabel = (uint8_t)enmType;
3659 pEntry->Label.uData = uData;
3660}
3661
3662
3663/**
3664 * Debug Info: Record info about a threaded call.
3665 */
3666static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
3667{
3668 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3669 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
3670 pEntry->ThreadedCall.fRecompiled = fRecompiled;
3671 pEntry->ThreadedCall.uUnused = 0;
3672 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
3673}
3674
3675
3676/**
3677 * Debug Info: Record info about a new guest instruction.
3678 */
3679static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
3680{
3681 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3682 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
3683 pEntry->GuestInstruction.uUnused = 0;
3684 pEntry->GuestInstruction.fExec = fExec;
3685}
3686
3687
3688/**
3689 * Debug Info: Record info about guest register shadowing.
3690 */
3691DECL_HIDDEN_THROW(void)
3692iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
3693 uint8_t idxHstReg /*= UINT8_MAX*/, uint8_t idxHstRegPrev /*= UINT8_MAX*/)
3694{
3695 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3696 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
3697 pEntry->GuestRegShadowing.uUnused = 0;
3698 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
3699 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
3700 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
3701#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3702 Assert( idxHstReg != UINT8_MAX
3703 || !(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg)));
3704#endif
3705}
3706
3707
3708# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3709/**
3710 * Debug Info: Record info about guest register shadowing.
3711 */
3712DECL_HIDDEN_THROW(void)
3713iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
3714 uint8_t idxHstSimdReg /*= UINT8_MAX*/, uint8_t idxHstSimdRegPrev /*= UINT8_MAX*/)
3715{
3716 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3717 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
3718 pEntry->GuestSimdRegShadowing.uUnused = 0;
3719 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
3720 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
3721 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
3722}
3723# endif
3724
3725
3726# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3727/**
3728 * Debug Info: Record info about delayed RIP updates.
3729 */
3730DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t offPc, uint32_t cInstrSkipped)
3731{
3732 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3733 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
3734 pEntry->DelayedPcUpdate.offPc = offPc;
3735 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
3736}
3737# endif
3738
3739# if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) || defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR)
3740
3741/**
3742 * Debug Info: Record info about a dirty guest register.
3743 */
3744DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegDirty(PIEMRECOMPILERSTATE pReNative, bool fSimdReg,
3745 uint8_t idxGstReg, uint8_t idxHstReg)
3746{
3747 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3748 pEntry->GuestRegDirty.uType = kIemTbDbgEntryType_GuestRegDirty;
3749 pEntry->GuestRegDirty.fSimdReg = fSimdReg ? 1 : 0;
3750 pEntry->GuestRegDirty.idxGstReg = idxGstReg;
3751 pEntry->GuestRegDirty.idxHstReg = idxHstReg;
3752}
3753
3754
3755/**
3756 * Debug Info: Record info about a dirty guest register writeback operation.
3757 */
3758DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegWriteback(PIEMRECOMPILERSTATE pReNative, bool fSimdReg, uint64_t fGstReg)
3759{
3760 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3761 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
3762 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
3763 pEntry->GuestRegWriteback.fGstReg = (uint32_t)fGstReg;
3764 /** @todo r=aeichner Can't fit the whole register mask in the debug info entry, deal with it when it becomes necessary. */
3765 Assert((uint64_t)pEntry->GuestRegWriteback.fGstReg == fGstReg);
3766}
3767
3768# endif /* defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) || defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR) */
3769
3770#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
3771
3772
3773/*********************************************************************************************************************************
3774* Register Allocator *
3775*********************************************************************************************************************************/
3776
3777/**
3778 * Register parameter indexes (indexed by argument number).
3779 */
3780DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
3781{
3782 IEMNATIVE_CALL_ARG0_GREG,
3783 IEMNATIVE_CALL_ARG1_GREG,
3784 IEMNATIVE_CALL_ARG2_GREG,
3785 IEMNATIVE_CALL_ARG3_GREG,
3786#if defined(IEMNATIVE_CALL_ARG4_GREG)
3787 IEMNATIVE_CALL_ARG4_GREG,
3788# if defined(IEMNATIVE_CALL_ARG5_GREG)
3789 IEMNATIVE_CALL_ARG5_GREG,
3790# if defined(IEMNATIVE_CALL_ARG6_GREG)
3791 IEMNATIVE_CALL_ARG6_GREG,
3792# if defined(IEMNATIVE_CALL_ARG7_GREG)
3793 IEMNATIVE_CALL_ARG7_GREG,
3794# endif
3795# endif
3796# endif
3797#endif
3798};
3799AssertCompile(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3800
3801/**
3802 * Call register masks indexed by argument count.
3803 */
3804DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
3805{
3806 0,
3807 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
3808 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
3809 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
3810 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3811 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
3812#if defined(IEMNATIVE_CALL_ARG4_GREG)
3813 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3814 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
3815# if defined(IEMNATIVE_CALL_ARG5_GREG)
3816 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3817 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
3818# if defined(IEMNATIVE_CALL_ARG6_GREG)
3819 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3820 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3821 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
3822# if defined(IEMNATIVE_CALL_ARG7_GREG)
3823 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3824 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3825 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
3826# endif
3827# endif
3828# endif
3829#endif
3830};
3831
3832#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
3833/**
3834 * BP offset of the stack argument slots.
3835 *
3836 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
3837 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
3838 */
3839DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
3840{
3841 IEMNATIVE_FP_OFF_STACK_ARG0,
3842# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
3843 IEMNATIVE_FP_OFF_STACK_ARG1,
3844# endif
3845# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
3846 IEMNATIVE_FP_OFF_STACK_ARG2,
3847# endif
3848# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
3849 IEMNATIVE_FP_OFF_STACK_ARG3,
3850# endif
3851};
3852AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
3853#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
3854
3855/**
3856 * Info about shadowed guest register values.
3857 * @see IEMNATIVEGSTREG
3858 */
3859DECL_HIDDEN_CONST(IEMANTIVEGSTREGINFO const) g_aGstShadowInfo[] =
3860{
3861#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
3862 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
3863 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
3864 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
3865 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
3866 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
3867 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
3868 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
3869 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
3870 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
3871 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
3872 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
3873 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
3874 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
3875 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
3876 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
3877 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
3878 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
3879 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
3880 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
3881 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
3882 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
3883 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
3884 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
3885 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
3886 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
3887 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
3888 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
3889 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
3890 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
3891 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
3892 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
3893 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
3894 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
3895 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
3896 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
3897 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
3898 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
3899 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
3900 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
3901 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
3902 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
3903 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
3904 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
3905 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
3906 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
3907 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
3908 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
3909 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
3910#undef CPUMCTX_OFF_AND_SIZE
3911};
3912AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
3913
3914
3915/** Host CPU general purpose register names. */
3916DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
3917{
3918#ifdef RT_ARCH_AMD64
3919 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
3920#elif RT_ARCH_ARM64
3921 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
3922 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
3923#else
3924# error "port me"
3925#endif
3926};
3927
3928
3929#if 0 /* unused */
3930/**
3931 * Tries to locate a suitable register in the given register mask.
3932 *
3933 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3934 * failed.
3935 *
3936 * @returns Host register number on success, returns UINT8_MAX on failure.
3937 */
3938static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
3939{
3940 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3941 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3942 if (fRegs)
3943 {
3944 /** @todo pick better here: */
3945 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
3946
3947 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3948 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3949 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3950 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3951
3952 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3953 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3954 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3955 return idxReg;
3956 }
3957 return UINT8_MAX;
3958}
3959#endif /* unused */
3960
3961
3962#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3963/**
3964 * Stores the host reg @a idxHstReg into guest shadow register @a enmGstReg.
3965 *
3966 * @returns New code buffer offset on success, UINT32_MAX on failure.
3967 * @param pReNative .
3968 * @param off The current code buffer position.
3969 * @param enmGstReg The guest register to store to.
3970 * @param idxHstReg The host register to store from.
3971 */
3972DECL_FORCE_INLINE_THROW(uint32_t)
3973iemNativeEmitStoreGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg, uint8_t idxHstReg)
3974{
3975 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
3976 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
3977
3978 switch (g_aGstShadowInfo[enmGstReg].cb)
3979 {
3980 case sizeof(uint64_t):
3981 return iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3982 case sizeof(uint32_t):
3983 return iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3984 case sizeof(uint16_t):
3985 return iemNativeEmitStoreGprToVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3986#if 0 /* not present in the table. */
3987 case sizeof(uint8_t):
3988 return iemNativeEmitStoreGprToVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3989#endif
3990 default:
3991 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
3992 }
3993}
3994
3995
3996/**
3997 * Emits code to flush a pending write of the given guest register if any.
3998 *
3999 * @returns New code buffer offset.
4000 * @param pReNative The native recompile state.
4001 * @param off Current code buffer position.
4002 * @param enmGstReg The guest register to flush.
4003 */
4004DECL_HIDDEN_THROW(uint32_t)
4005iemNativeRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg)
4006{
4007 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4008
4009 Assert(enmGstReg >= kIemNativeGstReg_GprFirst && enmGstReg <= kIemNativeGstReg_GprLast);
4010 Assert( idxHstReg != UINT8_MAX
4011 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg));
4012 Log12(("iemNativeRegFlushPendingWrite: Clearing guest register %s shadowed by host %s (off=%#x)\n",
4013 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg], off));
4014
4015 off = iemNativeEmitStoreGprWithGstShadowReg(pReNative, off, enmGstReg, idxHstReg);
4016
4017 pReNative->Core.bmGstRegShadowDirty &= ~RT_BIT_64(enmGstReg);
4018 return off;
4019}
4020
4021
4022/**
4023 * Flush the given set of guest registers if marked as dirty.
4024 *
4025 * @returns New code buffer offset.
4026 * @param pReNative The native recompile state.
4027 * @param off Current code buffer position.
4028 * @param fFlushGstReg The guest register set to flush (default is flush everything).
4029 */
4030DECL_HIDDEN_THROW(uint32_t)
4031iemNativeRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstReg /*= UINT64_MAX*/)
4032{
4033 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fFlushGstReg;
4034 if (bmGstRegShadowDirty)
4035 {
4036# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4037 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4038 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, bmGstRegShadowDirty);
4039# endif
4040 do
4041 {
4042 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
4043 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
4044 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
4045 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4046 } while (bmGstRegShadowDirty);
4047 }
4048
4049 return off;
4050}
4051
4052
4053/**
4054 * Flush all shadowed guest registers marked as dirty for the given host register.
4055 *
4056 * @returns New code buffer offset.
4057 * @param pReNative The native recompile state.
4058 * @param off Current code buffer position.
4059 * @param idxHstReg The host register.
4060 *
4061 * @note This doesn't do any unshadowing of guest registers from the host register.
4062 */
4063DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushDirtyGuestByHostRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg)
4064{
4065 /* We need to flush any pending guest register writes this host register shadows. */
4066 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4067 if (pReNative->Core.bmGstRegShadowDirty & fGstRegShadows)
4068 {
4069# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4070 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4071 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, pReNative->Core.bmGstRegShadowDirty & fGstRegShadows);
4072# endif
4073 /** @todo r=bird: This is a crap way of enumerating a bitmask where we're
4074 * likely to only have a single bit set. It'll be in the 0..15 range,
4075 * but still it's 15 unnecessary loops for the last guest register. */
4076
4077 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fGstRegShadows;
4078 do
4079 {
4080 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
4081 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
4082 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
4083 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4084 } while (bmGstRegShadowDirty);
4085 }
4086
4087 return off;
4088}
4089#endif
4090
4091
4092/**
4093 * Locate a register, possibly freeing one up.
4094 *
4095 * This ASSUMES the caller has done the minimal/optimal allocation checks and
4096 * failed.
4097 *
4098 * @returns Host register number on success. Returns UINT8_MAX if no registers
4099 * found, the caller is supposed to deal with this and raise a
4100 * allocation type specific status code (if desired).
4101 *
4102 * @throws VBox status code if we're run into trouble spilling a variable of
4103 * recording debug info. Does NOT throw anything if we're out of
4104 * registers, though.
4105 */
4106static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
4107 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
4108{
4109 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
4110 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
4111 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
4112
4113 /*
4114 * Try a freed register that's shadowing a guest register.
4115 */
4116 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
4117 if (fRegs)
4118 {
4119 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
4120
4121#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4122 /*
4123 * When we have livness information, we use it to kick out all shadowed
4124 * guest register that will not be needed any more in this TB. If we're
4125 * lucky, this may prevent us from ending up here again.
4126 *
4127 * Note! We must consider the previous entry here so we don't free
4128 * anything that the current threaded function requires (current
4129 * entry is produced by the next threaded function).
4130 */
4131 uint32_t const idxCurCall = pReNative->idxCurCall;
4132 if (idxCurCall > 0)
4133 {
4134 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
4135
4136# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4137 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
4138 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
4139 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
4140#else
4141 /* Construct a mask of the registers not in the read or write state.
4142 Note! We could skips writes, if they aren't from us, as this is just
4143 a hack to prevent trashing registers that have just been written
4144 or will be written when we retire the current instruction. */
4145 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
4146 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
4147 & IEMLIVENESSBIT_MASK;
4148#endif
4149 /* Merge EFLAGS. */
4150 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
4151 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
4152 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
4153 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
4154 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
4155
4156 /* If it matches any shadowed registers. */
4157 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
4158 {
4159#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4160 /* Writeback any dirty shadow registers we are about to unshadow. */
4161 *poff = iemNativeRegFlushDirtyGuest(pReNative, *poff, fToFreeMask);
4162#endif
4163
4164 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
4165 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
4166 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
4167
4168 /* See if we've got any unshadowed registers we can return now. */
4169 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
4170 if (fUnshadowedRegs)
4171 {
4172 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
4173 return (fPreferVolatile
4174 ? ASMBitFirstSetU32(fUnshadowedRegs)
4175 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4176 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
4177 - 1;
4178 }
4179 }
4180 }
4181#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4182
4183 unsigned const idxReg = (fPreferVolatile
4184 ? ASMBitFirstSetU32(fRegs)
4185 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4186 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
4187 - 1;
4188
4189 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4190 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4191 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4192 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4193
4194#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4195 /* We need to flush any pending guest register writes this host register shadows. */
4196 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
4197#endif
4198
4199 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4200 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4201 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4202 return idxReg;
4203 }
4204
4205 /*
4206 * Try free up a variable that's in a register.
4207 *
4208 * We do two rounds here, first evacuating variables we don't need to be
4209 * saved on the stack, then in the second round move things to the stack.
4210 */
4211 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
4212 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
4213 {
4214 uint32_t fVars = pReNative->Core.bmVars;
4215 while (fVars)
4216 {
4217 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
4218 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
4219#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4220 if (pReNative->Core.aVars[idxVar].fSimdReg) /* Need to ignore SIMD variables here or we end up freeing random registers. */
4221 continue;
4222#endif
4223
4224 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
4225 && (RT_BIT_32(idxReg) & fRegMask)
4226 && ( iLoop == 0
4227 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
4228 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
4229 && !pReNative->Core.aVars[idxVar].fRegAcquired)
4230 {
4231 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
4232 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
4233 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4234 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4235 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4236 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
4237#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4238 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
4239#endif
4240
4241 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
4242 {
4243 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
4244 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
4245 }
4246
4247 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
4248 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
4249
4250 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4251 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4252 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4253 return idxReg;
4254 }
4255 fVars &= ~RT_BIT_32(idxVar);
4256 }
4257 }
4258
4259 return UINT8_MAX;
4260}
4261
4262
4263/**
4264 * Reassigns a variable to a different register specified by the caller.
4265 *
4266 * @returns The new code buffer position.
4267 * @param pReNative The native recompile state.
4268 * @param off The current code buffer position.
4269 * @param idxVar The variable index.
4270 * @param idxRegOld The old host register number.
4271 * @param idxRegNew The new host register number.
4272 * @param pszCaller The caller for logging.
4273 */
4274static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4275 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
4276{
4277 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4278 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
4279#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4280 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4281#endif
4282 RT_NOREF(pszCaller);
4283
4284#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4285 Assert(!(pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
4286#endif
4287 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
4288
4289 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
4290#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4291 Assert(!(fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
4292#endif
4293 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
4294 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
4295 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
4296
4297 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
4298 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
4299 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
4300 if (fGstRegShadows)
4301 {
4302 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
4303 | RT_BIT_32(idxRegNew);
4304 while (fGstRegShadows)
4305 {
4306 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4307 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4308
4309 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
4310 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
4311 }
4312 }
4313
4314 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
4315 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
4316 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
4317 return off;
4318}
4319
4320
4321/**
4322 * Moves a variable to a different register or spills it onto the stack.
4323 *
4324 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
4325 * kinds can easily be recreated if needed later.
4326 *
4327 * @returns The new code buffer position.
4328 * @param pReNative The native recompile state.
4329 * @param off The current code buffer position.
4330 * @param idxVar The variable index.
4331 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
4332 * call-volatile registers.
4333 */
4334DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4335 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_GREG_MASK*/)
4336{
4337 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4338 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4339 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
4340 Assert(!pVar->fRegAcquired);
4341
4342 uint8_t const idxRegOld = pVar->idxReg;
4343 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
4344 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
4345 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
4346 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
4347 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
4348 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4349 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
4350 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
4351#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4352 Assert(!(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
4353#endif
4354
4355
4356 /** @todo Add statistics on this.*/
4357 /** @todo Implement basic variable liveness analysis (python) so variables
4358 * can be freed immediately once no longer used. This has the potential to
4359 * be trashing registers and stack for dead variables.
4360 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
4361
4362 /*
4363 * First try move it to a different register, as that's cheaper.
4364 */
4365 fForbiddenRegs |= RT_BIT_32(idxRegOld);
4366 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
4367 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
4368 if (fRegs)
4369 {
4370 /* Avoid using shadow registers, if possible. */
4371 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
4372 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
4373 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
4374 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
4375 }
4376
4377 /*
4378 * Otherwise we must spill the register onto the stack.
4379 */
4380 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
4381 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
4382 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
4383 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
4384
4385 pVar->idxReg = UINT8_MAX;
4386 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
4387 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
4388 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
4389 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
4390 return off;
4391}
4392
4393
4394/**
4395 * Allocates a temporary host general purpose register.
4396 *
4397 * This may emit code to save register content onto the stack in order to free
4398 * up a register.
4399 *
4400 * @returns The host register number; throws VBox status code on failure,
4401 * so no need to check the return value.
4402 * @param pReNative The native recompile state.
4403 * @param poff Pointer to the variable with the code buffer position.
4404 * This will be update if we need to move a variable from
4405 * register to stack in order to satisfy the request.
4406 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4407 * registers (@c true, default) or the other way around
4408 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4409 */
4410DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
4411{
4412 /*
4413 * Try find a completely unused register, preferably a call-volatile one.
4414 */
4415 uint8_t idxReg;
4416 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4417 & ~pReNative->Core.bmHstRegsWithGstShadow
4418 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
4419 if (fRegs)
4420 {
4421 if (fPreferVolatile)
4422 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4423 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4424 else
4425 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4426 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4427 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4428 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4429 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4430 }
4431 else
4432 {
4433 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
4434 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4435 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4436 }
4437 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4438}
4439
4440
4441/**
4442 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
4443 * registers.
4444 *
4445 * @returns The host register number; throws VBox status code on failure,
4446 * so no need to check the return value.
4447 * @param pReNative The native recompile state.
4448 * @param poff Pointer to the variable with the code buffer position.
4449 * This will be update if we need to move a variable from
4450 * register to stack in order to satisfy the request.
4451 * @param fRegMask Mask of acceptable registers.
4452 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4453 * registers (@c true, default) or the other way around
4454 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4455 */
4456DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
4457 bool fPreferVolatile /*= true*/)
4458{
4459 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
4460 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
4461
4462 /*
4463 * Try find a completely unused register, preferably a call-volatile one.
4464 */
4465 uint8_t idxReg;
4466 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4467 & ~pReNative->Core.bmHstRegsWithGstShadow
4468 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
4469 & fRegMask;
4470 if (fRegs)
4471 {
4472 if (fPreferVolatile)
4473 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4474 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4475 else
4476 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4477 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4478 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4479 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4480 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4481 }
4482 else
4483 {
4484 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
4485 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4486 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4487 }
4488 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4489}
4490
4491
4492/**
4493 * Allocates a temporary register for loading an immediate value into.
4494 *
4495 * This will emit code to load the immediate, unless there happens to be an
4496 * unused register with the value already loaded.
4497 *
4498 * The caller will not modify the returned register, it must be considered
4499 * read-only. Free using iemNativeRegFreeTmpImm.
4500 *
4501 * @returns The host register number; throws VBox status code on failure, so no
4502 * need to check the return value.
4503 * @param pReNative The native recompile state.
4504 * @param poff Pointer to the variable with the code buffer position.
4505 * @param uImm The immediate value that the register must hold upon
4506 * return.
4507 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4508 * registers (@c true, default) or the other way around
4509 * (@c false).
4510 *
4511 * @note Reusing immediate values has not been implemented yet.
4512 */
4513DECL_HIDDEN_THROW(uint8_t)
4514iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
4515{
4516 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
4517 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
4518 return idxReg;
4519}
4520
4521
4522/**
4523 * Allocates a temporary host general purpose register for keeping a guest
4524 * register value.
4525 *
4526 * Since we may already have a register holding the guest register value,
4527 * code will be emitted to do the loading if that's not the case. Code may also
4528 * be emitted if we have to free up a register to satify the request.
4529 *
4530 * @returns The host register number; throws VBox status code on failure, so no
4531 * need to check the return value.
4532 * @param pReNative The native recompile state.
4533 * @param poff Pointer to the variable with the code buffer
4534 * position. This will be update if we need to move a
4535 * variable from register to stack in order to satisfy
4536 * the request.
4537 * @param enmGstReg The guest register that will is to be updated.
4538 * @param enmIntendedUse How the caller will be using the host register.
4539 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
4540 * register is okay (default). The ASSUMPTION here is
4541 * that the caller has already flushed all volatile
4542 * registers, so this is only applied if we allocate a
4543 * new register.
4544 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
4545 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
4546 */
4547DECL_HIDDEN_THROW(uint8_t)
4548iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
4549 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
4550 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
4551{
4552 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4553#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4554 AssertMsg( fSkipLivenessAssert
4555 || pReNative->idxCurCall == 0
4556 || enmGstReg == kIemNativeGstReg_Pc
4557 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4558 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4559 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
4560 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4561 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
4562 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4563#endif
4564 RT_NOREF(fSkipLivenessAssert);
4565#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
4566 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
4567#endif
4568 uint32_t const fRegMask = !fNoVolatileRegs
4569 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
4570 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4571
4572 /*
4573 * First check if the guest register value is already in a host register.
4574 */
4575 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4576 {
4577 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4578 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4579 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4580 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4581
4582 /* It's not supposed to be allocated... */
4583 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4584 {
4585 /*
4586 * If the register will trash the guest shadow copy, try find a
4587 * completely unused register we can use instead. If that fails,
4588 * we need to disassociate the host reg from the guest reg.
4589 */
4590 /** @todo would be nice to know if preserving the register is in any way helpful. */
4591 /* If the purpose is calculations, try duplicate the register value as
4592 we'll be clobbering the shadow. */
4593 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
4594 && ( ~pReNative->Core.bmHstRegs
4595 & ~pReNative->Core.bmHstRegsWithGstShadow
4596 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
4597 {
4598 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
4599
4600 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4601
4602 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4603 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4604 g_apszIemNativeHstRegNames[idxRegNew]));
4605 idxReg = idxRegNew;
4606 }
4607 /* If the current register matches the restrictions, go ahead and allocate
4608 it for the caller. */
4609 else if (fRegMask & RT_BIT_32(idxReg))
4610 {
4611 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4612 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4613 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4614 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4615 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
4616 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4617 else
4618 {
4619 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
4620 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
4621 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4622 }
4623 }
4624 /* Otherwise, allocate a register that satisfies the caller and transfer
4625 the shadowing if compatible with the intended use. (This basically
4626 means the call wants a non-volatile register (RSP push/pop scenario).) */
4627 else
4628 {
4629 Assert(fNoVolatileRegs);
4630 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
4631 !fNoVolatileRegs
4632 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
4633 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4634 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4635 {
4636 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4637 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
4638 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
4639 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4640 }
4641 else
4642 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4643 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4644 g_apszIemNativeHstRegNames[idxRegNew]));
4645 idxReg = idxRegNew;
4646 }
4647 }
4648 else
4649 {
4650 /*
4651 * Oops. Shadowed guest register already allocated!
4652 *
4653 * Allocate a new register, copy the value and, if updating, the
4654 * guest shadow copy assignment to the new register.
4655 */
4656 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4657 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
4658 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
4659 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
4660
4661 /** @todo share register for readonly access. */
4662 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
4663 enmIntendedUse == kIemNativeGstRegUse_Calculation);
4664
4665 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4666 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4667
4668 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4669 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4670 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
4671 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4672 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4673 else
4674 {
4675 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4676 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
4677 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4678 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4679 }
4680 idxReg = idxRegNew;
4681 }
4682 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
4683
4684#ifdef VBOX_STRICT
4685 /* Strict builds: Check that the value is correct. */
4686 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4687#endif
4688
4689#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4690 /** @todo r=aeichner Implement for registers other than GPR as well. */
4691 if ( ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4692 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
4693 && enmGstReg >= kIemNativeGstReg_GprFirst
4694 && enmGstReg <= kIemNativeGstReg_GprLast
4695 )
4696 {
4697# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4698 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
4699 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
4700# endif
4701 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
4702 }
4703#endif
4704
4705 return idxReg;
4706 }
4707
4708 /*
4709 * Allocate a new register, load it with the guest value and designate it as a copy of the
4710 */
4711 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
4712
4713 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4714 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
4715
4716 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4717 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
4718 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
4719 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4720
4721#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4722 /** @todo r=aeichner Implement for registers other than GPR as well. */
4723 if ( ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4724 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
4725 && enmGstReg >= kIemNativeGstReg_GprFirst
4726 && enmGstReg <= kIemNativeGstReg_GprLast
4727 )
4728 {
4729# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4730 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
4731 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxRegNew);
4732# endif
4733 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
4734 }
4735#endif
4736
4737 return idxRegNew;
4738}
4739
4740
4741/**
4742 * Allocates a temporary host general purpose register that already holds the
4743 * given guest register value.
4744 *
4745 * The use case for this function is places where the shadowing state cannot be
4746 * modified due to branching and such. This will fail if the we don't have a
4747 * current shadow copy handy or if it's incompatible. The only code that will
4748 * be emitted here is value checking code in strict builds.
4749 *
4750 * The intended use can only be readonly!
4751 *
4752 * @returns The host register number, UINT8_MAX if not present.
4753 * @param pReNative The native recompile state.
4754 * @param poff Pointer to the instruction buffer offset.
4755 * Will be updated in strict builds if a register is
4756 * found.
4757 * @param enmGstReg The guest register that will is to be updated.
4758 * @note In strict builds, this may throw instruction buffer growth failures.
4759 * Non-strict builds will not throw anything.
4760 * @sa iemNativeRegAllocTmpForGuestReg
4761 */
4762DECL_HIDDEN_THROW(uint8_t)
4763iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4764{
4765 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4766#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4767 AssertMsg( pReNative->idxCurCall == 0
4768 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4769 || enmGstReg == kIemNativeGstReg_Pc,
4770 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4771#endif
4772
4773 /*
4774 * First check if the guest register value is already in a host register.
4775 */
4776 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4777 {
4778 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4779 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4780 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4781 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4782
4783 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4784 {
4785 /*
4786 * We only do readonly use here, so easy compared to the other
4787 * variant of this code.
4788 */
4789 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4790 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4791 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4792 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
4793 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4794
4795#ifdef VBOX_STRICT
4796 /* Strict builds: Check that the value is correct. */
4797 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4798#else
4799 RT_NOREF(poff);
4800#endif
4801 return idxReg;
4802 }
4803 }
4804
4805 return UINT8_MAX;
4806}
4807
4808
4809/**
4810 * Allocates argument registers for a function call.
4811 *
4812 * @returns New code buffer offset on success; throws VBox status code on failure, so no
4813 * need to check the return value.
4814 * @param pReNative The native recompile state.
4815 * @param off The current code buffer offset.
4816 * @param cArgs The number of arguments the function call takes.
4817 */
4818DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
4819{
4820 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
4821 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
4822 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4823 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4824
4825 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4826 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4827 else if (cArgs == 0)
4828 return true;
4829
4830 /*
4831 * Do we get luck and all register are free and not shadowing anything?
4832 */
4833 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
4834 for (uint32_t i = 0; i < cArgs; i++)
4835 {
4836 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4837 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4838 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4839 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4840 }
4841 /*
4842 * Okay, not lucky so we have to free up the registers.
4843 */
4844 else
4845 for (uint32_t i = 0; i < cArgs; i++)
4846 {
4847 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4848 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
4849 {
4850 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4851 {
4852 case kIemNativeWhat_Var:
4853 {
4854 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4855 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4856 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
4857 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4858 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
4859#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4860 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4861#endif
4862
4863 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
4864 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4865 else
4866 {
4867 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4868 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4869 }
4870 break;
4871 }
4872
4873 case kIemNativeWhat_Tmp:
4874 case kIemNativeWhat_Arg:
4875 case kIemNativeWhat_rc:
4876 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4877 default:
4878 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
4879 }
4880
4881 }
4882 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4883 {
4884 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4885 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4886 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4887#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4888 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
4889#endif
4890 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4891 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4892 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4893 }
4894 else
4895 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4896 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4897 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4898 }
4899 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
4900 return true;
4901}
4902
4903
4904DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
4905
4906
4907#if 0
4908/**
4909 * Frees a register assignment of any type.
4910 *
4911 * @param pReNative The native recompile state.
4912 * @param idxHstReg The register to free.
4913 *
4914 * @note Does not update variables.
4915 */
4916DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4917{
4918 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4919 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4920 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
4921 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
4922 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
4923 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
4924 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
4925 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
4926 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
4927 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
4928 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4929 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4930 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4931 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4932
4933 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4934 /* no flushing, right:
4935 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4936 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4937 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4938 */
4939}
4940#endif
4941
4942
4943/**
4944 * Frees a temporary register.
4945 *
4946 * Any shadow copies of guest registers assigned to the host register will not
4947 * be flushed by this operation.
4948 */
4949DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4950{
4951 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4952 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
4953 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4954 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
4955 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4956}
4957
4958
4959/**
4960 * Frees a temporary immediate register.
4961 *
4962 * It is assumed that the call has not modified the register, so it still hold
4963 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
4964 */
4965DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4966{
4967 iemNativeRegFreeTmp(pReNative, idxHstReg);
4968}
4969
4970
4971/**
4972 * Frees a register assigned to a variable.
4973 *
4974 * The register will be disassociated from the variable.
4975 */
4976DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4977{
4978 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4979 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4980 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4981 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4982 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
4983#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4984 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4985#endif
4986
4987 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4988 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4989 if (!fFlushShadows)
4990 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4991 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4992 else
4993 {
4994 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4995 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4996#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4997 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadowsOld));
4998#endif
4999 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5000 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
5001 uint64_t fGstRegShadows = fGstRegShadowsOld;
5002 while (fGstRegShadows)
5003 {
5004 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
5005 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
5006
5007 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
5008 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
5009 }
5010 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
5011 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
5012 }
5013}
5014
5015
5016#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5017# ifdef LOG_ENABLED
5018/** Host CPU SIMD register names. */
5019DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
5020{
5021# ifdef RT_ARCH_AMD64
5022 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
5023# elif RT_ARCH_ARM64
5024 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
5025 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
5026# else
5027# error "port me"
5028# endif
5029};
5030# endif
5031
5032
5033/**
5034 * Frees a SIMD register assigned to a variable.
5035 *
5036 * The register will be disassociated from the variable.
5037 */
5038DECLHIDDEN(void) iemNativeSimdRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
5039{
5040 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstReg));
5041 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
5042 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
5043 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5044 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
5045 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
5046
5047 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
5048 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
5049 if (!fFlushShadows)
5050 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
5051 g_apszIemNativeHstSimdRegNames[idxHstReg], pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows, idxVar));
5052 else
5053 {
5054 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5055 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows;
5056 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
5057 pReNative->Core.bmGstSimdRegShadows &= ~fGstRegShadowsOld;
5058 uint64_t fGstRegShadows = fGstRegShadowsOld;
5059 while (fGstRegShadows)
5060 {
5061 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
5062 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
5063
5064 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxHstReg);
5065 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = UINT8_MAX;
5066 }
5067 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
5068 g_apszIemNativeHstSimdRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
5069 }
5070}
5071
5072
5073/**
5074 * Reassigns a variable to a different SIMD register specified by the caller.
5075 *
5076 * @returns The new code buffer position.
5077 * @param pReNative The native recompile state.
5078 * @param off The current code buffer position.
5079 * @param idxVar The variable index.
5080 * @param idxRegOld The old host register number.
5081 * @param idxRegNew The new host register number.
5082 * @param pszCaller The caller for logging.
5083 */
5084static uint32_t iemNativeSimdRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
5085 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
5086{
5087 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5088 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
5089 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
5090 RT_NOREF(pszCaller);
5091
5092 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5093 & pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows));
5094 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxRegNew, off);
5095
5096 uint64_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
5097 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5098 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
5099
5100 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
5101 pszCaller, idxVar, g_apszIemNativeHstSimdRegNames[idxRegOld], g_apszIemNativeHstSimdRegNames[idxRegNew], fGstRegShadows));
5102 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
5103
5104 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U))
5105 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxRegNew, idxRegOld);
5106 else
5107 {
5108 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U));
5109 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxRegNew, idxRegOld);
5110 }
5111
5112 pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
5113 pReNative->Core.aHstSimdRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
5114 pReNative->Core.aHstSimdRegs[idxRegNew].idxVar = idxVar;
5115 if (fGstRegShadows)
5116 {
5117 pReNative->Core.bmHstSimdRegsWithGstShadow = (pReNative->Core.bmHstSimdRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
5118 | RT_BIT_32(idxRegNew);
5119 while (fGstRegShadows)
5120 {
5121 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
5122 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
5123
5124 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxRegOld);
5125 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = idxRegNew;
5126 }
5127 }
5128
5129 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
5130 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
5131 pReNative->Core.bmHstSimdRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstSimdRegs & ~RT_BIT_32(idxRegOld));
5132 return off;
5133}
5134
5135
5136/**
5137 * Moves a variable to a different register or spills it onto the stack.
5138 *
5139 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
5140 * kinds can easily be recreated if needed later.
5141 *
5142 * @returns The new code buffer position.
5143 * @param pReNative The native recompile state.
5144 * @param off The current code buffer position.
5145 * @param idxVar The variable index.
5146 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
5147 * call-volatile registers.
5148 */
5149DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
5150 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK*/)
5151{
5152 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5153 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
5154 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
5155 Assert(!pVar->fRegAcquired);
5156 Assert(!pVar->fSimdReg);
5157
5158 uint8_t const idxRegOld = pVar->idxReg;
5159 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
5160 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegOld));
5161 Assert(pReNative->Core.aHstSimdRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
5162 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows)
5163 == pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows);
5164 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5165 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxRegOld))
5166 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
5167 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5168 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
5169
5170 /** @todo Add statistics on this.*/
5171 /** @todo Implement basic variable liveness analysis (python) so variables
5172 * can be freed immediately once no longer used. This has the potential to
5173 * be trashing registers and stack for dead variables.
5174 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
5175
5176 /*
5177 * First try move it to a different register, as that's cheaper.
5178 */
5179 fForbiddenRegs |= RT_BIT_32(idxRegOld);
5180 fForbiddenRegs |= IEMNATIVE_SIMD_REG_FIXED_MASK;
5181 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & ~fForbiddenRegs;
5182 if (fRegs)
5183 {
5184 /* Avoid using shadow registers, if possible. */
5185 if (fRegs & ~pReNative->Core.bmHstSimdRegsWithGstShadow)
5186 fRegs &= ~pReNative->Core.bmHstSimdRegsWithGstShadow;
5187 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
5188 return iemNativeSimdRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeSimdRegMoveOrSpillStackVar");
5189 }
5190
5191 /*
5192 * Otherwise we must spill the register onto the stack.
5193 */
5194 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
5195 Log12(("iemNativeSimdRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
5196 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
5197
5198 if (pVar->cbVar == sizeof(RTUINT128U))
5199 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
5200 else
5201 {
5202 Assert(pVar->cbVar == sizeof(RTUINT256U));
5203 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
5204 }
5205
5206 pVar->idxReg = UINT8_MAX;
5207 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
5208 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
5209 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
5210 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
5211 return off;
5212}
5213
5214
5215/**
5216 * Called right before emitting a call instruction to move anything important
5217 * out of call-volatile SIMD registers, free and flush the call-volatile SIMD registers,
5218 * optionally freeing argument variables.
5219 *
5220 * @returns New code buffer offset, UINT32_MAX on failure.
5221 * @param pReNative The native recompile state.
5222 * @param off The code buffer offset.
5223 * @param cArgs The number of arguments the function call takes.
5224 * It is presumed that the host register part of these have
5225 * been allocated as such already and won't need moving,
5226 * just freeing.
5227 * @param fKeepVars Mask of variables that should keep their register
5228 * assignments. Caller must take care to handle these.
5229 */
5230DECL_HIDDEN_THROW(uint32_t)
5231iemNativeSimdRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
5232{
5233 Assert(!cArgs); RT_NOREF(cArgs);
5234
5235 /* fKeepVars will reduce this mask. */
5236 uint32_t fSimdRegsToFree = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
5237
5238 /*
5239 * Move anything important out of volatile registers.
5240 */
5241 uint32_t fSimdRegsToMove = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5242#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
5243 & ~RT_BIT_32(IEMNATIVE_SIMD_REG_FIXED_TMP0)
5244#endif
5245 ;
5246
5247 fSimdRegsToMove &= pReNative->Core.bmHstSimdRegs;
5248 if (!fSimdRegsToMove)
5249 { /* likely */ }
5250 else
5251 {
5252 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: fSimdRegsToMove=%#x\n", fSimdRegsToMove));
5253 while (fSimdRegsToMove != 0)
5254 {
5255 unsigned const idxSimdReg = ASMBitFirstSetU32(fSimdRegsToMove) - 1;
5256 fSimdRegsToMove &= ~RT_BIT_32(idxSimdReg);
5257
5258 switch (pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat)
5259 {
5260 case kIemNativeWhat_Var:
5261 {
5262 uint8_t const idxVar = pReNative->Core.aHstRegs[idxSimdReg].idxVar;
5263 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5264 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
5265 Assert(pVar->idxReg == idxSimdReg);
5266 Assert(pVar->fSimdReg);
5267 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
5268 {
5269 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxSimdReg=%d\n",
5270 idxVar, pVar->enmKind, pVar->idxReg));
5271 if (pVar->enmKind != kIemNativeVarKind_Stack)
5272 pVar->idxReg = UINT8_MAX;
5273 else
5274 off = iemNativeSimdRegMoveOrSpillStackVar(pReNative, off, idxVar);
5275 }
5276 else
5277 fSimdRegsToFree &= ~RT_BIT_32(idxSimdReg);
5278 continue;
5279 }
5280
5281 case kIemNativeWhat_Arg:
5282 AssertMsgFailed(("What?!?: %u\n", idxSimdReg));
5283 continue;
5284
5285 case kIemNativeWhat_rc:
5286 case kIemNativeWhat_Tmp:
5287 AssertMsgFailed(("Missing free: %u\n", idxSimdReg));
5288 continue;
5289
5290 case kIemNativeWhat_FixedReserved:
5291#ifdef RT_ARCH_ARM64
5292 continue; /* On ARM the upper half of the virtual 256-bit register. */
5293#endif
5294
5295 case kIemNativeWhat_FixedTmp:
5296 case kIemNativeWhat_pVCpuFixed:
5297 case kIemNativeWhat_pCtxFixed:
5298 case kIemNativeWhat_PcShadow:
5299 case kIemNativeWhat_Invalid:
5300 case kIemNativeWhat_End:
5301 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
5302 }
5303 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
5304 }
5305 }
5306
5307 /*
5308 * Do the actual freeing.
5309 */
5310 if (pReNative->Core.bmHstSimdRegs & fSimdRegsToFree)
5311 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegs %#x -> %#x\n",
5312 pReNative->Core.bmHstSimdRegs, pReNative->Core.bmHstSimdRegs & ~fSimdRegsToFree));
5313 pReNative->Core.bmHstSimdRegs &= ~fSimdRegsToFree;
5314
5315 /* If there are guest register shadows in any call-volatile register, we
5316 have to clear the corrsponding guest register masks for each register. */
5317 uint32_t fHstSimdRegsWithGstShadow = pReNative->Core.bmHstSimdRegsWithGstShadow & fSimdRegsToFree;
5318 if (fHstSimdRegsWithGstShadow)
5319 {
5320 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
5321 pReNative->Core.bmHstSimdRegsWithGstShadow, pReNative->Core.bmHstSimdRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK, fHstSimdRegsWithGstShadow));
5322 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~fHstSimdRegsWithGstShadow;
5323 do
5324 {
5325 unsigned const idxSimdReg = ASMBitFirstSetU32(fHstSimdRegsWithGstShadow) - 1;
5326 fHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxSimdReg);
5327
5328 AssertMsg(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows != 0, ("idxSimdReg=%#x\n", idxSimdReg));
5329
5330#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5331 /*
5332 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
5333 * to call volatile registers).
5334 */
5335 if ( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5336 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows)
5337 off = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, off, idxSimdReg);
5338#endif
5339 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5340 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows));
5341
5342 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows;
5343 pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows = 0;
5344 } while (fHstSimdRegsWithGstShadow != 0);
5345 }
5346
5347 return off;
5348}
5349#endif
5350
5351
5352/**
5353 * Called right before emitting a call instruction to move anything important
5354 * out of call-volatile registers, free and flush the call-volatile registers,
5355 * optionally freeing argument variables.
5356 *
5357 * @returns New code buffer offset, UINT32_MAX on failure.
5358 * @param pReNative The native recompile state.
5359 * @param off The code buffer offset.
5360 * @param cArgs The number of arguments the function call takes.
5361 * It is presumed that the host register part of these have
5362 * been allocated as such already and won't need moving,
5363 * just freeing.
5364 * @param fKeepVars Mask of variables that should keep their register
5365 * assignments. Caller must take care to handle these.
5366 */
5367DECL_HIDDEN_THROW(uint32_t)
5368iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
5369{
5370 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
5371
5372 /* fKeepVars will reduce this mask. */
5373 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
5374
5375 /*
5376 * Move anything important out of volatile registers.
5377 */
5378 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
5379 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
5380 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
5381#ifdef IEMNATIVE_REG_FIXED_TMP0
5382 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
5383#endif
5384#ifdef IEMNATIVE_REG_FIXED_TMP1
5385 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
5386#endif
5387#ifdef IEMNATIVE_REG_FIXED_PC_DBG
5388 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
5389#endif
5390 & ~g_afIemNativeCallRegs[cArgs];
5391
5392 fRegsToMove &= pReNative->Core.bmHstRegs;
5393 if (!fRegsToMove)
5394 { /* likely */ }
5395 else
5396 {
5397 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
5398 while (fRegsToMove != 0)
5399 {
5400 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
5401 fRegsToMove &= ~RT_BIT_32(idxReg);
5402
5403 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
5404 {
5405 case kIemNativeWhat_Var:
5406 {
5407 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
5408 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5409 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
5410 Assert(pVar->idxReg == idxReg);
5411#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5412 Assert(!pVar->fSimdReg);
5413#endif
5414 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
5415 {
5416 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
5417 idxVar, pVar->enmKind, pVar->idxReg));
5418 if (pVar->enmKind != kIemNativeVarKind_Stack)
5419 pVar->idxReg = UINT8_MAX;
5420 else
5421 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
5422 }
5423 else
5424 fRegsToFree &= ~RT_BIT_32(idxReg);
5425 continue;
5426 }
5427
5428 case kIemNativeWhat_Arg:
5429 AssertMsgFailed(("What?!?: %u\n", idxReg));
5430 continue;
5431
5432 case kIemNativeWhat_rc:
5433 case kIemNativeWhat_Tmp:
5434 AssertMsgFailed(("Missing free: %u\n", idxReg));
5435 continue;
5436
5437 case kIemNativeWhat_FixedTmp:
5438 case kIemNativeWhat_pVCpuFixed:
5439 case kIemNativeWhat_pCtxFixed:
5440 case kIemNativeWhat_PcShadow:
5441 case kIemNativeWhat_FixedReserved:
5442 case kIemNativeWhat_Invalid:
5443 case kIemNativeWhat_End:
5444 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
5445 }
5446 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
5447 }
5448 }
5449
5450 /*
5451 * Do the actual freeing.
5452 */
5453 if (pReNative->Core.bmHstRegs & fRegsToFree)
5454 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
5455 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
5456 pReNative->Core.bmHstRegs &= ~fRegsToFree;
5457
5458 /* If there are guest register shadows in any call-volatile register, we
5459 have to clear the corrsponding guest register masks for each register. */
5460 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
5461 if (fHstRegsWithGstShadow)
5462 {
5463 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
5464 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
5465 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
5466 do
5467 {
5468 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
5469 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5470
5471 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
5472
5473#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5474 /*
5475 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
5476 * to call volatile registers).
5477 */
5478 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
5479 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxReg);
5480 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
5481#endif
5482
5483 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
5484 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
5485 } while (fHstRegsWithGstShadow != 0);
5486 }
5487
5488#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5489 /* Now for the SIMD registers, no argument support for now. */
5490 off = iemNativeSimdRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /*cArgs*/, fKeepVars);
5491#endif
5492
5493 return off;
5494}
5495
5496
5497/**
5498 * Flushes a set of guest register shadow copies.
5499 *
5500 * This is usually done after calling a threaded function or a C-implementation
5501 * of an instruction.
5502 *
5503 * @param pReNative The native recompile state.
5504 * @param fGstRegs Set of guest registers to flush.
5505 */
5506DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
5507{
5508 /*
5509 * Reduce the mask by what's currently shadowed
5510 */
5511 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
5512 fGstRegs &= bmGstRegShadowsOld;
5513 if (fGstRegs)
5514 {
5515 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
5516 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
5517 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
5518 if (bmGstRegShadowsNew)
5519 {
5520 /*
5521 * Partial.
5522 */
5523 do
5524 {
5525 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5526 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5527 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5528 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
5529 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5530#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5531 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
5532#endif
5533
5534 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
5535 fGstRegs &= ~fInThisHstReg;
5536 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5537 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5538 if (!fGstRegShadowsNew)
5539 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5540 } while (fGstRegs != 0);
5541 }
5542 else
5543 {
5544 /*
5545 * Clear all.
5546 */
5547 do
5548 {
5549 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5550 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5551 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5552 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
5553 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5554#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5555 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
5556#endif
5557
5558 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5559 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5560 } while (fGstRegs != 0);
5561 pReNative->Core.bmHstRegsWithGstShadow = 0;
5562 }
5563 }
5564}
5565
5566
5567/**
5568 * Flushes guest register shadow copies held by a set of host registers.
5569 *
5570 * This is used with the TLB lookup code for ensuring that we don't carry on
5571 * with any guest shadows in volatile registers, as these will get corrupted by
5572 * a TLB miss.
5573 *
5574 * @param pReNative The native recompile state.
5575 * @param fHstRegs Set of host registers to flush guest shadows for.
5576 */
5577DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
5578{
5579 /*
5580 * Reduce the mask by what's currently shadowed.
5581 */
5582 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
5583 fHstRegs &= bmHstRegsWithGstShadowOld;
5584 if (fHstRegs)
5585 {
5586 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
5587 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
5588 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
5589 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
5590 if (bmHstRegsWithGstShadowNew)
5591 {
5592 /*
5593 * Partial (likely).
5594 */
5595 uint64_t fGstShadows = 0;
5596 do
5597 {
5598 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5599 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
5600 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
5601 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
5602#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5603 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5604#endif
5605
5606 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5607 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5608 fHstRegs &= ~RT_BIT_32(idxHstReg);
5609 } while (fHstRegs != 0);
5610 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
5611 }
5612 else
5613 {
5614 /*
5615 * Clear all.
5616 */
5617 do
5618 {
5619 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5620 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
5621 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
5622 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
5623#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5624 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5625#endif
5626
5627 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5628 fHstRegs &= ~RT_BIT_32(idxHstReg);
5629 } while (fHstRegs != 0);
5630 pReNative->Core.bmGstRegShadows = 0;
5631 }
5632 }
5633}
5634
5635
5636/**
5637 * Restores guest shadow copies in volatile registers.
5638 *
5639 * This is used after calling a helper function (think TLB miss) to restore the
5640 * register state of volatile registers.
5641 *
5642 * @param pReNative The native recompile state.
5643 * @param off The code buffer offset.
5644 * @param fHstRegsActiveShadows Set of host registers which are allowed to
5645 * be active (allocated) w/o asserting. Hack.
5646 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
5647 * iemNativeVarRestoreVolatileRegsPostHlpCall()
5648 */
5649DECL_HIDDEN_THROW(uint32_t)
5650iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
5651{
5652 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
5653 if (fHstRegs)
5654 {
5655 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
5656 do
5657 {
5658 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5659
5660 /* It's not fatal if a register is active holding a variable that
5661 shadowing a guest register, ASSUMING all pending guest register
5662 writes were flushed prior to the helper call. However, we'll be
5663 emitting duplicate restores, so it wasts code space. */
5664 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
5665 RT_NOREF(fHstRegsActiveShadows);
5666
5667 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5668#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5669 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadows));
5670#endif
5671 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
5672 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
5673 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
5674
5675 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
5676 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
5677
5678 fHstRegs &= ~RT_BIT_32(idxHstReg);
5679 } while (fHstRegs != 0);
5680 }
5681 return off;
5682}
5683
5684
5685
5686
5687/*********************************************************************************************************************************
5688* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
5689*********************************************************************************************************************************/
5690#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5691
5692/**
5693 * Info about shadowed guest SIMD register values.
5694 * @see IEMNATIVEGSTSIMDREG
5695 */
5696static struct
5697{
5698 /** Offset in VMCPU of XMM (low 128-bit) registers. */
5699 uint32_t offXmm;
5700 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
5701 uint32_t offYmm;
5702 /** Name (for logging). */
5703 const char *pszName;
5704} const g_aGstSimdShadowInfo[] =
5705{
5706#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
5707 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
5708 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
5709 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
5710 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
5711 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
5712 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
5713 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
5714 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
5715 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
5716 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
5717 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
5718 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
5719 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
5720 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
5721 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
5722 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
5723 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
5724#undef CPUMCTX_OFF_AND_SIZE
5725};
5726AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
5727
5728
5729/**
5730 * Frees a temporary SIMD register.
5731 *
5732 * Any shadow copies of guest registers assigned to the host register will not
5733 * be flushed by this operation.
5734 */
5735DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
5736{
5737 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
5738 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
5739 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5740 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
5741 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5742}
5743
5744
5745/**
5746 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
5747 *
5748 * @returns New code bufferoffset.
5749 * @param pReNative The native recompile state.
5750 * @param off Current code buffer position.
5751 * @param enmGstSimdReg The guest SIMD register to flush.
5752 */
5753DECL_HIDDEN_THROW(uint32_t)
5754iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
5755{
5756 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5757
5758 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
5759 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
5760 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg),
5761 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)));
5762
5763 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
5764 {
5765 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5766 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
5767 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5768 }
5769
5770 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))
5771 {
5772 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5773 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
5774 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5775 }
5776
5777 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, enmGstSimdReg);
5778 return off;
5779}
5780
5781
5782/**
5783 * Flush the given set of guest SIMD registers if marked as dirty.
5784 *
5785 * @returns New code buffer offset.
5786 * @param pReNative The native recompile state.
5787 * @param off Current code buffer position.
5788 * @param fFlushGstSimdReg The guest SIMD register set to flush (default is flush everything).
5789 */
5790DECL_HIDDEN_THROW(uint32_t)
5791iemNativeSimdRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstSimdReg /*= UINT64_MAX*/)
5792{
5793 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5794 & fFlushGstSimdReg;
5795 if (bmGstSimdRegShadowDirty)
5796 {
5797# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5798 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5799 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
5800# endif
5801
5802 do
5803 {
5804 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
5805 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
5806 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5807 } while (bmGstSimdRegShadowDirty);
5808 }
5809
5810 return off;
5811}
5812
5813
5814#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5815/**
5816 * Flush all shadowed guest SIMD registers marked as dirty for the given host SIMD register.
5817 *
5818 * @returns New code buffer offset.
5819 * @param pReNative The native recompile state.
5820 * @param off Current code buffer position.
5821 * @param idxHstSimdReg The host SIMD register.
5822 *
5823 * @note This doesn't do any unshadowing of guest registers from the host register.
5824 */
5825DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxHstSimdReg)
5826{
5827 /* We need to flush any pending guest register writes this host register shadows. */
5828 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5829 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
5830 if (bmGstSimdRegShadowDirty)
5831 {
5832# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5833 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5834 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
5835# endif
5836
5837 do
5838 {
5839 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
5840 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
5841 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5842 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg));
5843 } while (bmGstSimdRegShadowDirty);
5844 }
5845
5846 return off;
5847}
5848#endif
5849
5850
5851/**
5852 * Locate a register, possibly freeing one up.
5853 *
5854 * This ASSUMES the caller has done the minimal/optimal allocation checks and
5855 * failed.
5856 *
5857 * @returns Host register number on success. Returns UINT8_MAX if no registers
5858 * found, the caller is supposed to deal with this and raise a
5859 * allocation type specific status code (if desired).
5860 *
5861 * @throws VBox status code if we're run into trouble spilling a variable of
5862 * recording debug info. Does NOT throw anything if we're out of
5863 * registers, though.
5864 */
5865static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
5866 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
5867{
5868 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFree);
5869 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5870 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5871
5872 /*
5873 * Try a freed register that's shadowing a guest register.
5874 */
5875 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
5876 if (fRegs)
5877 {
5878 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeNoVar);
5879
5880#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5881 /*
5882 * When we have livness information, we use it to kick out all shadowed
5883 * guest register that will not be needed any more in this TB. If we're
5884 * lucky, this may prevent us from ending up here again.
5885 *
5886 * Note! We must consider the previous entry here so we don't free
5887 * anything that the current threaded function requires (current
5888 * entry is produced by the next threaded function).
5889 */
5890 uint32_t const idxCurCall = pReNative->idxCurCall;
5891 if (idxCurCall > 0)
5892 {
5893 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
5894
5895# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5896 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
5897 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
5898 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
5899#else
5900 /* Construct a mask of the registers not in the read or write state.
5901 Note! We could skips writes, if they aren't from us, as this is just
5902 a hack to prevent trashing registers that have just been written
5903 or will be written when we retire the current instruction. */
5904 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5905 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5906 & IEMLIVENESSBIT_MASK;
5907#endif
5908 /* If it matches any shadowed registers. */
5909 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
5910 {
5911 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessUnshadowed);
5912 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
5913 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
5914
5915 /* See if we've got any unshadowed registers we can return now. */
5916 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
5917 if (fUnshadowedRegs)
5918 {
5919 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessHelped);
5920 return (fPreferVolatile
5921 ? ASMBitFirstSetU32(fUnshadowedRegs)
5922 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
5923 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
5924 - 1;
5925 }
5926 }
5927 }
5928#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5929
5930 unsigned const idxReg = (fPreferVolatile
5931 ? ASMBitFirstSetU32(fRegs)
5932 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5933 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
5934 - 1;
5935
5936 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
5937 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
5938 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5939 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
5940
5941 /* We need to flush any pending guest register writes this host SIMD register shadows. */
5942 *poff = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, *poff, idxReg);
5943
5944 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5945 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5946 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5947 pReNative->Core.aHstSimdRegs[idxReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5948 return idxReg;
5949 }
5950
5951 AssertFailed(); /** @todo The following needs testing when it actually gets hit. */
5952
5953 /*
5954 * Try free up a variable that's in a register.
5955 *
5956 * We do two rounds here, first evacuating variables we don't need to be
5957 * saved on the stack, then in the second round move things to the stack.
5958 */
5959 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeVar);
5960 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
5961 {
5962 uint32_t fVars = pReNative->Core.bmVars;
5963 while (fVars)
5964 {
5965 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
5966 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
5967 if (!pReNative->Core.aVars[idxVar].fSimdReg) /* Ignore non SIMD variables here. */
5968 continue;
5969
5970 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
5971 && (RT_BIT_32(idxReg) & fRegMask)
5972 && ( iLoop == 0
5973 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
5974 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5975 && !pReNative->Core.aVars[idxVar].fRegAcquired)
5976 {
5977 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxReg));
5978 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
5979 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5980 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstSimdReg_End));
5981 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg))
5982 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
5983
5984 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5985 {
5986 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
5987 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
5988 }
5989
5990 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5991 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
5992
5993 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5994 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
5995 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5996 return idxReg;
5997 }
5998 fVars &= ~RT_BIT_32(idxVar);
5999 }
6000 }
6001
6002 AssertFailed();
6003 return UINT8_MAX;
6004}
6005
6006
6007/**
6008 * Flushes a set of guest register shadow copies.
6009 *
6010 * This is usually done after calling a threaded function or a C-implementation
6011 * of an instruction.
6012 *
6013 * @param pReNative The native recompile state.
6014 * @param fGstSimdRegs Set of guest SIMD registers to flush.
6015 */
6016DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
6017{
6018 /*
6019 * Reduce the mask by what's currently shadowed
6020 */
6021 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
6022 fGstSimdRegs &= bmGstSimdRegShadows;
6023 if (fGstSimdRegs)
6024 {
6025 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
6026 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
6027 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
6028 if (bmGstSimdRegShadowsNew)
6029 {
6030 /*
6031 * Partial.
6032 */
6033 do
6034 {
6035 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
6036 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
6037 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
6038 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
6039 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
6040 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
6041
6042 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
6043 fGstSimdRegs &= ~fInThisHstReg;
6044 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
6045 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
6046 if (!fGstRegShadowsNew)
6047 {
6048 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
6049 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
6050 }
6051 } while (fGstSimdRegs != 0);
6052 }
6053 else
6054 {
6055 /*
6056 * Clear all.
6057 */
6058 do
6059 {
6060 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
6061 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
6062 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
6063 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
6064 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
6065 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
6066
6067 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
6068 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
6069 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
6070 } while (fGstSimdRegs != 0);
6071 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
6072 }
6073 }
6074}
6075
6076
6077/**
6078 * Allocates a temporary host SIMD register.
6079 *
6080 * This may emit code to save register content onto the stack in order to free
6081 * up a register.
6082 *
6083 * @returns The host register number; throws VBox status code on failure,
6084 * so no need to check the return value.
6085 * @param pReNative The native recompile state.
6086 * @param poff Pointer to the variable with the code buffer position.
6087 * This will be update if we need to move a variable from
6088 * register to stack in order to satisfy the request.
6089 * @param fPreferVolatile Whether to prefer volatile over non-volatile
6090 * registers (@c true, default) or the other way around
6091 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
6092 */
6093DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
6094{
6095 /*
6096 * Try find a completely unused register, preferably a call-volatile one.
6097 */
6098 uint8_t idxSimdReg;
6099 uint32_t fRegs = ~pReNative->Core.bmHstRegs
6100 & ~pReNative->Core.bmHstRegsWithGstShadow
6101 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
6102 if (fRegs)
6103 {
6104 if (fPreferVolatile)
6105 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
6106 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
6107 else
6108 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
6109 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
6110 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
6111 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
6112
6113 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
6114 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
6115 }
6116 else
6117 {
6118 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
6119 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
6120 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
6121 }
6122
6123 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
6124 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
6125}
6126
6127
6128/**
6129 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
6130 * registers.
6131 *
6132 * @returns The host register number; throws VBox status code on failure,
6133 * so no need to check the return value.
6134 * @param pReNative The native recompile state.
6135 * @param poff Pointer to the variable with the code buffer position.
6136 * This will be update if we need to move a variable from
6137 * register to stack in order to satisfy the request.
6138 * @param fRegMask Mask of acceptable registers.
6139 * @param fPreferVolatile Whether to prefer volatile over non-volatile
6140 * registers (@c true, default) or the other way around
6141 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
6142 */
6143DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
6144 bool fPreferVolatile /*= true*/)
6145{
6146 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
6147 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
6148
6149 /*
6150 * Try find a completely unused register, preferably a call-volatile one.
6151 */
6152 uint8_t idxSimdReg;
6153 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
6154 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
6155 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
6156 & fRegMask;
6157 if (fRegs)
6158 {
6159 if (fPreferVolatile)
6160 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
6161 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
6162 else
6163 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
6164 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
6165 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
6166 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
6167
6168 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
6169 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
6170 }
6171 else
6172 {
6173 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
6174 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
6175 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
6176 }
6177
6178 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
6179 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
6180}
6181
6182
6183/**
6184 * Sets the indiactor for which part of the given SIMD register has valid data loaded.
6185 *
6186 * @param pReNative The native recompile state.
6187 * @param idxHstSimdReg The host SIMD register to update the state for.
6188 * @param enmLoadSz The load size to set.
6189 */
6190DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg,
6191 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6192{
6193 /* Everything valid already? -> nothing to do. */
6194 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
6195 return;
6196
6197 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid)
6198 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
6199 else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz)
6200 {
6201 Assert( ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128
6202 && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
6203 || ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128
6204 && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128));
6205 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256;
6206 }
6207}
6208
6209
6210static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdRegDst,
6211 uint8_t idxHstSimdRegDst, uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
6212{
6213 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
6214 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst
6215 || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
6216 {
6217# ifdef RT_ARCH_ARM64
6218 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
6219 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
6220# endif
6221
6222 if (idxHstSimdRegDst != idxHstSimdRegSrc)
6223 {
6224 switch (enmLoadSzDst)
6225 {
6226 case kIemNativeGstSimdRegLdStSz_256:
6227 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
6228 break;
6229 case kIemNativeGstSimdRegLdStSz_Low128:
6230 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
6231 break;
6232 case kIemNativeGstSimdRegLdStSz_High128:
6233 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
6234 break;
6235 default:
6236 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6237 }
6238
6239 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst);
6240 }
6241 }
6242 else
6243 {
6244 /* The source doesn't has the part loaded, so load the register from CPUMCTX. */
6245 Assert(enmLoadSzDst == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSzDst == kIemNativeGstSimdRegLdStSz_High128);
6246 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, idxHstSimdRegDst, enmGstSimdRegDst, enmLoadSzDst);
6247 }
6248
6249 return off;
6250}
6251
6252
6253/**
6254 * Allocates a temporary host SIMD register for keeping a guest
6255 * SIMD register value.
6256 *
6257 * Since we may already have a register holding the guest register value,
6258 * code will be emitted to do the loading if that's not the case. Code may also
6259 * be emitted if we have to free up a register to satify the request.
6260 *
6261 * @returns The host register number; throws VBox status code on failure, so no
6262 * need to check the return value.
6263 * @param pReNative The native recompile state.
6264 * @param poff Pointer to the variable with the code buffer
6265 * position. This will be update if we need to move a
6266 * variable from register to stack in order to satisfy
6267 * the request.
6268 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
6269 * @param enmIntendedUse How the caller will be using the host register.
6270 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
6271 * register is okay (default). The ASSUMPTION here is
6272 * that the caller has already flushed all volatile
6273 * registers, so this is only applied if we allocate a
6274 * new register.
6275 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
6276 */
6277DECL_HIDDEN_THROW(uint8_t)
6278iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
6279 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
6280 bool fNoVolatileRegs /*= false*/)
6281{
6282 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
6283#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
6284 AssertMsg( pReNative->idxCurCall == 0
6285 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
6286 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
6287 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
6288 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
6289 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
6290 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
6291#endif
6292#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
6293 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
6294#endif
6295 uint32_t const fRegMask = !fNoVolatileRegs
6296 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
6297 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
6298
6299 /*
6300 * First check if the guest register value is already in a host register.
6301 */
6302 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
6303 {
6304 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
6305 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
6306 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
6307 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
6308
6309 /* It's not supposed to be allocated... */
6310 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
6311 {
6312 /*
6313 * If the register will trash the guest shadow copy, try find a
6314 * completely unused register we can use instead. If that fails,
6315 * we need to disassociate the host reg from the guest reg.
6316 */
6317 /** @todo would be nice to know if preserving the register is in any way helpful. */
6318 /* If the purpose is calculations, try duplicate the register value as
6319 we'll be clobbering the shadow. */
6320 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
6321 && ( ~pReNative->Core.bmHstSimdRegs
6322 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
6323 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
6324 {
6325 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
6326
6327 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
6328
6329 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
6330 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
6331 g_apszIemNativeHstSimdRegNames[idxRegNew]));
6332 idxSimdReg = idxRegNew;
6333 }
6334 /* If the current register matches the restrictions, go ahead and allocate
6335 it for the caller. */
6336 else if (fRegMask & RT_BIT_32(idxSimdReg))
6337 {
6338 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
6339 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
6340 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
6341 {
6342 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
6343 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxSimdReg, idxSimdReg, enmLoadSz);
6344 else
6345 iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz);
6346 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
6347 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
6348 }
6349 else
6350 {
6351 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
6352 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
6353 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
6354 }
6355 }
6356 /* Otherwise, allocate a register that satisfies the caller and transfer
6357 the shadowing if compatible with the intended use. (This basically
6358 means the call wants a non-volatile register (RSP push/pop scenario).) */
6359 else
6360 {
6361 Assert(fNoVolatileRegs);
6362 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
6363 !fNoVolatileRegs
6364 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
6365 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
6366 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
6367 {
6368 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
6369 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
6370 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
6371 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
6372 }
6373 else
6374 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
6375 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
6376 g_apszIemNativeHstSimdRegNames[idxRegNew]));
6377 idxSimdReg = idxRegNew;
6378 }
6379 }
6380 else
6381 {
6382 /*
6383 * Oops. Shadowed guest register already allocated!
6384 *
6385 * Allocate a new register, copy the value and, if updating, the
6386 * guest shadow copy assignment to the new register.
6387 */
6388 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
6389 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
6390 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
6391 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
6392
6393 /** @todo share register for readonly access. */
6394 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
6395 enmIntendedUse == kIemNativeGstRegUse_Calculation);
6396
6397 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
6398 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
6399 else
6400 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
6401
6402 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
6403 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
6404 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
6405 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
6406 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
6407 else
6408 {
6409 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
6410 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
6411 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
6412 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
6413 }
6414 idxSimdReg = idxRegNew;
6415 }
6416 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
6417
6418#ifdef VBOX_STRICT
6419 /* Strict builds: Check that the value is correct. */
6420 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
6421 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
6422#endif
6423
6424 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
6425 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
6426 {
6427# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6428 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
6429 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxSimdReg);
6430# endif
6431
6432 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
6433 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
6434 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
6435 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
6436 else
6437 {
6438 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
6439 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
6440 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
6441 }
6442 }
6443
6444 return idxSimdReg;
6445 }
6446
6447 /*
6448 * Allocate a new register, load it with the guest value and designate it as a copy of the
6449 */
6450 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
6451
6452 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
6453 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
6454 else
6455 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
6456
6457 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
6458 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
6459
6460 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
6461 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
6462 {
6463# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6464 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
6465 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxRegNew);
6466# endif
6467
6468 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
6469 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
6470 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
6471 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
6472 else
6473 {
6474 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
6475 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
6476 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
6477 }
6478 }
6479
6480 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
6481 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
6482
6483 return idxRegNew;
6484}
6485
6486
6487/**
6488 * Flushes guest SIMD register shadow copies held by a set of host registers.
6489 *
6490 * This is used whenever calling an external helper for ensuring that we don't carry on
6491 * with any guest shadows in volatile registers, as these will get corrupted by the caller.
6492 *
6493 * @param pReNative The native recompile state.
6494 * @param fHstSimdRegs Set of host SIMD registers to flush guest shadows for.
6495 */
6496DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstSimdRegs) RT_NOEXCEPT
6497{
6498 /*
6499 * Reduce the mask by what's currently shadowed.
6500 */
6501 uint32_t const bmHstSimdRegsWithGstShadowOld = pReNative->Core.bmHstSimdRegsWithGstShadow;
6502 fHstSimdRegs &= bmHstSimdRegsWithGstShadowOld;
6503 if (fHstSimdRegs)
6504 {
6505 uint32_t const bmHstSimdRegsWithGstShadowNew = bmHstSimdRegsWithGstShadowOld & ~fHstSimdRegs;
6506 Log12(("iemNativeSimdRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
6507 fHstSimdRegs, bmHstSimdRegsWithGstShadowOld, bmHstSimdRegsWithGstShadowNew));
6508 pReNative->Core.bmHstSimdRegsWithGstShadow = bmHstSimdRegsWithGstShadowNew;
6509 if (bmHstSimdRegsWithGstShadowNew)
6510 {
6511 /*
6512 * Partial (likely).
6513 */
6514 uint64_t fGstShadows = 0;
6515 do
6516 {
6517 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
6518 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
6519 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
6520 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
6521 Assert(!(( pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
6522 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
6523
6524 fGstShadows |= pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
6525 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
6526 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
6527 } while (fHstSimdRegs != 0);
6528 pReNative->Core.bmGstSimdRegShadows &= ~fGstShadows;
6529 }
6530 else
6531 {
6532 /*
6533 * Clear all.
6534 */
6535 do
6536 {
6537 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
6538 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
6539 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
6540 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
6541 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
6542 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
6543
6544 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
6545 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
6546 } while (fHstSimdRegs != 0);
6547 pReNative->Core.bmGstSimdRegShadows = 0;
6548 }
6549 }
6550}
6551#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6552
6553
6554
6555/*********************************************************************************************************************************
6556* Code emitters for flushing pending guest register writes and sanity checks *
6557*********************************************************************************************************************************/
6558
6559#ifdef VBOX_STRICT
6560/**
6561 * Does internal register allocator sanity checks.
6562 */
6563DECLHIDDEN(void) iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
6564{
6565 /*
6566 * Iterate host registers building a guest shadowing set.
6567 */
6568 uint64_t bmGstRegShadows = 0;
6569 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
6570 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
6571 while (bmHstRegsWithGstShadow)
6572 {
6573 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
6574 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
6575 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
6576
6577 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
6578 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
6579 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
6580 bmGstRegShadows |= fThisGstRegShadows;
6581 while (fThisGstRegShadows)
6582 {
6583 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
6584 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
6585 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
6586 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
6587 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
6588 }
6589 }
6590 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
6591 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
6592 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
6593
6594 /*
6595 * Now the other way around, checking the guest to host index array.
6596 */
6597 bmHstRegsWithGstShadow = 0;
6598 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
6599 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
6600 while (bmGstRegShadows)
6601 {
6602 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
6603 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
6604 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
6605
6606 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
6607 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
6608 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
6609 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
6610 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
6611 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
6612 }
6613 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
6614 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
6615 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
6616}
6617#endif /* VBOX_STRICT */
6618
6619
6620/**
6621 * Flushes any delayed guest register writes.
6622 *
6623 * This must be called prior to calling CImpl functions and any helpers that use
6624 * the guest state (like raising exceptions) and such.
6625 *
6626 * @note This function does not flush any shadowing information for guest registers. This needs to be done by
6627 * the caller if it wishes to do so.
6628 */
6629DECL_HIDDEN_THROW(uint32_t)
6630iemNativeRegFlushPendingWritesSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept, uint64_t fGstSimdShwExcept)
6631{
6632#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6633 if (!(fGstShwExcept & kIemNativeGstReg_Pc))
6634 off = iemNativeEmitPcWriteback(pReNative, off);
6635#else
6636 RT_NOREF(pReNative, fGstShwExcept);
6637#endif
6638
6639#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
6640 off = iemNativeRegFlushDirtyGuest(pReNative, off, ~fGstShwExcept);
6641#endif
6642
6643#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6644 off = iemNativeSimdRegFlushDirtyGuest(pReNative, off, ~fGstSimdShwExcept);
6645#endif
6646
6647 return off;
6648}
6649
6650
6651#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6652/**
6653 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
6654 */
6655DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcWritebackSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6656{
6657 Assert(pReNative->Core.offPc);
6658# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6659 iemNativeDbgInfoAddNativeOffset(pReNative, off);
6660 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, pReNative->Core.cInstrPcUpdateSkipped);
6661# endif
6662
6663# ifndef IEMNATIVE_REG_FIXED_PC_DBG
6664 /* Allocate a temporary PC register. */
6665 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6666
6667 /* Perform the addition and store the result. */
6668 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
6669 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6670
6671 /* Free but don't flush the PC register. */
6672 iemNativeRegFreeTmp(pReNative, idxPcReg);
6673# else
6674 /* Compare the shadow with the context value, they should match. */
6675 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
6676 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
6677# endif
6678
6679 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, pReNative->Core.cInstrPcUpdateSkipped);
6680 pReNative->Core.offPc = 0;
6681 pReNative->Core.cInstrPcUpdateSkipped = 0;
6682
6683 return off;
6684}
6685#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
6686
6687
6688/*********************************************************************************************************************************
6689* Code Emitters (larger snippets) *
6690*********************************************************************************************************************************/
6691
6692/**
6693 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
6694 * extending to 64-bit width.
6695 *
6696 * @returns New code buffer offset on success, UINT32_MAX on failure.
6697 * @param pReNative .
6698 * @param off The current code buffer position.
6699 * @param idxHstReg The host register to load the guest register value into.
6700 * @param enmGstReg The guest register to load.
6701 *
6702 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6703 * that is something the caller needs to do if applicable.
6704 */
6705DECL_HIDDEN_THROW(uint32_t)
6706iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
6707{
6708 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
6709 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
6710
6711 switch (g_aGstShadowInfo[enmGstReg].cb)
6712 {
6713 case sizeof(uint64_t):
6714 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6715 case sizeof(uint32_t):
6716 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6717 case sizeof(uint16_t):
6718 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6719#if 0 /* not present in the table. */
6720 case sizeof(uint8_t):
6721 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6722#endif
6723 default:
6724 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6725 }
6726}
6727
6728
6729#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6730/**
6731 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
6732 *
6733 * @returns New code buffer offset on success, UINT32_MAX on failure.
6734 * @param pReNative The recompiler state.
6735 * @param off The current code buffer position.
6736 * @param idxHstSimdReg The host register to load the guest register value into.
6737 * @param enmGstSimdReg The guest register to load.
6738 * @param enmLoadSz The load size of the register.
6739 *
6740 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6741 * that is something the caller needs to do if applicable.
6742 */
6743DECL_HIDDEN_THROW(uint32_t)
6744iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
6745 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6746{
6747 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
6748
6749 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz);
6750 switch (enmLoadSz)
6751 {
6752 case kIemNativeGstSimdRegLdStSz_256:
6753 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6754 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6755 case kIemNativeGstSimdRegLdStSz_Low128:
6756 return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6757 case kIemNativeGstSimdRegLdStSz_High128:
6758 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6759 default:
6760 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6761 }
6762}
6763#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6764
6765#ifdef VBOX_STRICT
6766
6767/**
6768 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
6769 *
6770 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6771 * Trashes EFLAGS on AMD64.
6772 */
6773DECL_HIDDEN_THROW(uint32_t)
6774iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
6775{
6776# ifdef RT_ARCH_AMD64
6777 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6778
6779 /* rol reg64, 32 */
6780 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6781 pbCodeBuf[off++] = 0xc1;
6782 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6783 pbCodeBuf[off++] = 32;
6784
6785 /* test reg32, ffffffffh */
6786 if (idxReg >= 8)
6787 pbCodeBuf[off++] = X86_OP_REX_B;
6788 pbCodeBuf[off++] = 0xf7;
6789 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6790 pbCodeBuf[off++] = 0xff;
6791 pbCodeBuf[off++] = 0xff;
6792 pbCodeBuf[off++] = 0xff;
6793 pbCodeBuf[off++] = 0xff;
6794
6795 /* je/jz +1 */
6796 pbCodeBuf[off++] = 0x74;
6797 pbCodeBuf[off++] = 0x01;
6798
6799 /* int3 */
6800 pbCodeBuf[off++] = 0xcc;
6801
6802 /* rol reg64, 32 */
6803 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6804 pbCodeBuf[off++] = 0xc1;
6805 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6806 pbCodeBuf[off++] = 32;
6807
6808# elif defined(RT_ARCH_ARM64)
6809 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6810 /* lsr tmp0, reg64, #32 */
6811 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
6812 /* cbz tmp0, +1 */
6813 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6814 /* brk #0x1100 */
6815 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
6816
6817# else
6818# error "Port me!"
6819# endif
6820 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6821 return off;
6822}
6823
6824
6825/**
6826 * Emitting code that checks that the content of register @a idxReg is the same
6827 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
6828 * instruction if that's not the case.
6829 *
6830 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6831 * Trashes EFLAGS on AMD64.
6832 */
6833DECL_HIDDEN_THROW(uint32_t)
6834iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
6835{
6836#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6837 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
6838 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg))
6839 return off;
6840#endif
6841
6842# ifdef RT_ARCH_AMD64
6843 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6844
6845 /* cmp reg, [mem] */
6846 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
6847 {
6848 if (idxReg >= 8)
6849 pbCodeBuf[off++] = X86_OP_REX_R;
6850 pbCodeBuf[off++] = 0x38;
6851 }
6852 else
6853 {
6854 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
6855 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
6856 else
6857 {
6858 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
6859 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6860 else
6861 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
6862 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
6863 if (idxReg >= 8)
6864 pbCodeBuf[off++] = X86_OP_REX_R;
6865 }
6866 pbCodeBuf[off++] = 0x39;
6867 }
6868 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
6869
6870 /* je/jz +1 */
6871 pbCodeBuf[off++] = 0x74;
6872 pbCodeBuf[off++] = 0x01;
6873
6874 /* int3 */
6875 pbCodeBuf[off++] = 0xcc;
6876
6877 /* For values smaller than the register size, we must check that the rest
6878 of the register is all zeros. */
6879 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
6880 {
6881 /* test reg64, imm32 */
6882 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6883 pbCodeBuf[off++] = 0xf7;
6884 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6885 pbCodeBuf[off++] = 0;
6886 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
6887 pbCodeBuf[off++] = 0xff;
6888 pbCodeBuf[off++] = 0xff;
6889
6890 /* je/jz +1 */
6891 pbCodeBuf[off++] = 0x74;
6892 pbCodeBuf[off++] = 0x01;
6893
6894 /* int3 */
6895 pbCodeBuf[off++] = 0xcc;
6896 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6897 }
6898 else
6899 {
6900 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6901 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
6902 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
6903 }
6904
6905# elif defined(RT_ARCH_ARM64)
6906 /* mov TMP0, [gstreg] */
6907 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
6908
6909 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6910 /* sub tmp0, tmp0, idxReg */
6911 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
6912 /* cbz tmp0, +1 */
6913 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6914 /* brk #0x1000+enmGstReg */
6915 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
6916 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6917
6918# else
6919# error "Port me!"
6920# endif
6921 return off;
6922}
6923
6924
6925# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6926# ifdef RT_ARCH_AMD64
6927/**
6928 * Helper for AMD64 to emit code which checks the low 128-bits of the given SIMD register against the given vCPU offset.
6929 */
6930DECL_FORCE_INLINE_THROW(uint32_t) iemNativeEmitGuestSimdRegValueCheckVCpuU128(uint8_t * const pbCodeBuf, uint32_t off, uint8_t idxSimdReg, uint32_t offVCpu)
6931{
6932 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
6933 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6934 if (idxSimdReg >= 8)
6935 pbCodeBuf[off++] = X86_OP_REX_R;
6936 pbCodeBuf[off++] = 0x0f;
6937 pbCodeBuf[off++] = 0x38;
6938 pbCodeBuf[off++] = 0x29;
6939 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxSimdReg, offVCpu);
6940
6941 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
6942 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6943 pbCodeBuf[off++] = X86_OP_REX_W
6944 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
6945 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6946 pbCodeBuf[off++] = 0x0f;
6947 pbCodeBuf[off++] = 0x3a;
6948 pbCodeBuf[off++] = 0x16;
6949 pbCodeBuf[off++] = 0xeb;
6950 pbCodeBuf[off++] = 0x00;
6951
6952 /* cmp tmp0, 0xffffffffffffffff. */
6953 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6954 pbCodeBuf[off++] = 0x83;
6955 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6956 pbCodeBuf[off++] = 0xff;
6957
6958 /* je/jz +1 */
6959 pbCodeBuf[off++] = 0x74;
6960 pbCodeBuf[off++] = 0x01;
6961
6962 /* int3 */
6963 pbCodeBuf[off++] = 0xcc;
6964
6965 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
6966 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6967 pbCodeBuf[off++] = X86_OP_REX_W
6968 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
6969 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6970 pbCodeBuf[off++] = 0x0f;
6971 pbCodeBuf[off++] = 0x3a;
6972 pbCodeBuf[off++] = 0x16;
6973 pbCodeBuf[off++] = 0xeb;
6974 pbCodeBuf[off++] = 0x01;
6975
6976 /* cmp tmp0, 0xffffffffffffffff. */
6977 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6978 pbCodeBuf[off++] = 0x83;
6979 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6980 pbCodeBuf[off++] = 0xff;
6981
6982 /* je/jz +1 */
6983 pbCodeBuf[off++] = 0x74;
6984 pbCodeBuf[off++] = 0x01;
6985
6986 /* int3 */
6987 pbCodeBuf[off++] = 0xcc;
6988
6989 return off;
6990}
6991# endif
6992
6993
6994/**
6995 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
6996 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
6997 * instruction if that's not the case.
6998 *
6999 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
7000 * Trashes EFLAGS on AMD64.
7001 */
7002DECL_HIDDEN_THROW(uint32_t)
7003iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
7004 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
7005{
7006 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
7007 if ( ( enmLoadSz == kIemNativeGstSimdRegLdStSz_256
7008 && ( IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)
7009 || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
7010 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128
7011 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
7012 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_High128
7013 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
7014 return off;
7015
7016# ifdef RT_ARCH_AMD64
7017 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
7018 {
7019 /* movdqa vectmp0, idxSimdReg */
7020 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
7021
7022 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
7023
7024 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
7025 g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
7026 }
7027
7028 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
7029 {
7030 /* Due to the fact that CPUMCTX stores the high 128-bit separately we need to do this all over again for the high part. */
7031 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 50);
7032
7033 /* vextracti128 vectmp0, idxSimdReg, 1 */
7034 pbCodeBuf[off++] = X86_OP_VEX3;
7035 pbCodeBuf[off++] = (idxSimdReg < 8 ? X86_OP_VEX3_BYTE1_R : 0)
7036 | X86_OP_VEX3_BYTE1_X
7037 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? X86_OP_VEX3_BYTE1_B : 0)
7038 | 0x03; /* Opcode map */
7039 pbCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX3_BYTE2_P_066H);
7040 pbCodeBuf[off++] = 0x39;
7041 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxSimdReg & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
7042 pbCodeBuf[off++] = 0x01;
7043
7044 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
7045 g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
7046 }
7047# elif defined(RT_ARCH_ARM64)
7048 /* mov vectmp0, [gstreg] */
7049 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
7050
7051 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
7052 {
7053 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
7054 /* eor vectmp0, vectmp0, idxSimdReg */
7055 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
7056 /* uaddlv vectmp0, vectmp0.16B */
7057 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, kArmv8InstrUAddLVSz_16B);
7058 /* umov tmp0, vectmp0.H[0] */
7059 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
7060 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
7061 /* cbz tmp0, +1 */
7062 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
7063 /* brk #0x1000+enmGstReg */
7064 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
7065 }
7066
7067 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
7068 {
7069 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
7070 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
7071 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg + 1);
7072 /* uaddlv vectmp0 + 1, (vectmp0 + 1).16B */
7073 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, kArmv8InstrUAddLVSz_16B);
7074 /* umov tmp0, (vectmp0 + 1).H[0] */
7075 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
7076 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
7077 /* cbz tmp0, +1 */
7078 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
7079 /* brk #0x1000+enmGstReg */
7080 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
7081 }
7082
7083# else
7084# error "Port me!"
7085# endif
7086
7087 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7088 return off;
7089}
7090# endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
7091
7092
7093/**
7094 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
7095 * important bits.
7096 *
7097 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
7098 * Trashes EFLAGS on AMD64.
7099 */
7100DECL_HIDDEN_THROW(uint32_t)
7101iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
7102{
7103 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
7104 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
7105 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
7106 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
7107
7108#ifdef RT_ARCH_AMD64
7109 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
7110
7111 /* je/jz +1 */
7112 pbCodeBuf[off++] = 0x74;
7113 pbCodeBuf[off++] = 0x01;
7114
7115 /* int3 */
7116 pbCodeBuf[off++] = 0xcc;
7117
7118# elif defined(RT_ARCH_ARM64)
7119 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7120
7121 /* b.eq +1 */
7122 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
7123 /* brk #0x2000 */
7124 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
7125
7126# else
7127# error "Port me!"
7128# endif
7129 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7130
7131 iemNativeRegFreeTmp(pReNative, idxRegTmp);
7132 return off;
7133}
7134
7135#endif /* VBOX_STRICT */
7136
7137
7138#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
7139/**
7140 * Worker for IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK.
7141 */
7142DECL_HIDDEN_THROW(uint32_t)
7143iemNativeEmitEFlagsSkippingCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflNeeded)
7144{
7145 uint32_t const offVCpu = RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags);
7146
7147 fEflNeeded &= X86_EFL_STATUS_BITS;
7148 if (fEflNeeded)
7149 {
7150# ifdef RT_ARCH_AMD64
7151 /* test dword [pVCpu + offVCpu], imm32 */
7152 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7153 if (fEflNeeded <= 0xff)
7154 {
7155 pCodeBuf[off++] = 0xf6;
7156 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
7157 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
7158 }
7159 else
7160 {
7161 pCodeBuf[off++] = 0xf7;
7162 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
7163 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
7164 pCodeBuf[off++] = RT_BYTE2(fEflNeeded);
7165 pCodeBuf[off++] = RT_BYTE3(fEflNeeded);
7166 pCodeBuf[off++] = RT_BYTE4(fEflNeeded);
7167 }
7168 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7169
7170# else
7171 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
7172 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, offVCpu);
7173 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp, fEflNeeded);
7174# ifdef RT_ARCH_ARM64
7175 off = iemNativeEmitJzToFixed(pReNative, off, off + 2);
7176 off = iemNativeEmitBrk(pReNative, off, 0x7777);
7177# else
7178# error "Port me!"
7179# endif
7180 iemNativeRegFreeTmp(pReNative, idxRegTmp);
7181# endif
7182 }
7183 return off;
7184}
7185#endif /* IEMNATIVE_STRICT_EFLAGS_SKIPPING */
7186
7187
7188/**
7189 * Emits a code for checking the return code of a call and rcPassUp, returning
7190 * from the code if either are non-zero.
7191 */
7192DECL_HIDDEN_THROW(uint32_t)
7193iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
7194{
7195#ifdef RT_ARCH_AMD64
7196 /*
7197 * AMD64: eax = call status code.
7198 */
7199
7200 /* edx = rcPassUp */
7201 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
7202# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7203 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
7204# endif
7205
7206 /* edx = eax | rcPassUp */
7207 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
7208 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
7209 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
7210 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7211
7212 /* Jump to non-zero status return path. */
7213 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
7214
7215 /* done. */
7216
7217#elif RT_ARCH_ARM64
7218 /*
7219 * ARM64: w0 = call status code.
7220 */
7221# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7222 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
7223# endif
7224 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
7225
7226 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
7227
7228 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
7229
7230 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
7231 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
7232 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
7233
7234#else
7235# error "port me"
7236#endif
7237 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7238 RT_NOREF_PV(idxInstr);
7239 return off;
7240}
7241
7242
7243/**
7244 * Emits code to check if the content of @a idxAddrReg is a canonical address,
7245 * raising a \#GP(0) if it isn't.
7246 *
7247 * @returns New code buffer offset, UINT32_MAX on failure.
7248 * @param pReNative The native recompile state.
7249 * @param off The code buffer offset.
7250 * @param idxAddrReg The host register with the address to check.
7251 * @param idxInstr The current instruction.
7252 */
7253DECL_HIDDEN_THROW(uint32_t)
7254iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
7255{
7256 /*
7257 * Make sure we don't have any outstanding guest register writes as we may
7258 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
7259 */
7260 off = iemNativeRegFlushPendingWrites(pReNative, off);
7261
7262#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7263 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7264#else
7265 RT_NOREF(idxInstr);
7266#endif
7267
7268#ifdef RT_ARCH_AMD64
7269 /*
7270 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
7271 * return raisexcpt();
7272 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
7273 */
7274 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
7275
7276 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
7277 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
7278 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
7279 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
7280 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
7281
7282 iemNativeRegFreeTmp(pReNative, iTmpReg);
7283
7284#elif defined(RT_ARCH_ARM64)
7285 /*
7286 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
7287 * return raisexcpt();
7288 * ----
7289 * mov x1, 0x800000000000
7290 * add x1, x0, x1
7291 * cmp xzr, x1, lsr 48
7292 * b.ne .Lraisexcpt
7293 */
7294 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
7295
7296 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
7297 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
7298 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
7299 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
7300
7301 iemNativeRegFreeTmp(pReNative, iTmpReg);
7302
7303#else
7304# error "Port me"
7305#endif
7306 return off;
7307}
7308
7309
7310/**
7311 * Emits code to check if that the content of @a idxAddrReg is within the limit
7312 * of CS, raising a \#GP(0) if it isn't.
7313 *
7314 * @returns New code buffer offset; throws VBox status code on error.
7315 * @param pReNative The native recompile state.
7316 * @param off The code buffer offset.
7317 * @param idxAddrReg The host register (32-bit) with the address to
7318 * check.
7319 * @param idxInstr The current instruction.
7320 */
7321DECL_HIDDEN_THROW(uint32_t)
7322iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7323 uint8_t idxAddrReg, uint8_t idxInstr)
7324{
7325 /*
7326 * Make sure we don't have any outstanding guest register writes as we may
7327 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
7328 */
7329 off = iemNativeRegFlushPendingWrites(pReNative, off);
7330
7331#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7332 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7333#else
7334 RT_NOREF(idxInstr);
7335#endif
7336
7337 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
7338 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
7339 kIemNativeGstRegUse_ReadOnly);
7340
7341 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
7342 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
7343
7344 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
7345 return off;
7346}
7347
7348
7349/**
7350 * Emits a call to a CImpl function or something similar.
7351 */
7352DECL_HIDDEN_THROW(uint32_t)
7353iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
7354 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
7355{
7356 /* Writeback everything. */
7357 off = iemNativeRegFlushPendingWrites(pReNative, off);
7358
7359 /*
7360 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
7361 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
7362 */
7363 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
7364 fGstShwFlush
7365 | RT_BIT_64(kIemNativeGstReg_Pc)
7366 | RT_BIT_64(kIemNativeGstReg_EFlags));
7367 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
7368
7369 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
7370
7371 /*
7372 * Load the parameters.
7373 */
7374#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
7375 /* Special code the hidden VBOXSTRICTRC pointer. */
7376 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7377 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
7378 if (cAddParams > 0)
7379 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
7380 if (cAddParams > 1)
7381 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
7382 if (cAddParams > 2)
7383 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
7384 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
7385
7386#else
7387 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
7388 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7389 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
7390 if (cAddParams > 0)
7391 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
7392 if (cAddParams > 1)
7393 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
7394 if (cAddParams > 2)
7395# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
7396 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
7397# else
7398 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
7399# endif
7400#endif
7401
7402 /*
7403 * Make the call.
7404 */
7405 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
7406
7407#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
7408 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
7409#endif
7410
7411 /*
7412 * Check the status code.
7413 */
7414 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
7415}
7416
7417
7418/**
7419 * Emits a call to a threaded worker function.
7420 */
7421DECL_HIDDEN_THROW(uint32_t)
7422iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
7423{
7424 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
7425
7426 /* We don't know what the threaded function is doing so we must flush all pending writes. */
7427 off = iemNativeRegFlushPendingWrites(pReNative, off);
7428
7429 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
7430 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
7431
7432#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7433 /* The threaded function may throw / long jmp, so set current instruction
7434 number if we're counting. */
7435 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7436#endif
7437
7438 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
7439
7440#ifdef RT_ARCH_AMD64
7441 /* Load the parameters and emit the call. */
7442# ifdef RT_OS_WINDOWS
7443# ifndef VBOXSTRICTRC_STRICT_ENABLED
7444 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
7445 if (cParams > 0)
7446 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
7447 if (cParams > 1)
7448 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
7449 if (cParams > 2)
7450 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
7451# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
7452 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
7453 if (cParams > 0)
7454 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
7455 if (cParams > 1)
7456 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
7457 if (cParams > 2)
7458 {
7459 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
7460 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
7461 }
7462 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
7463# endif /* VBOXSTRICTRC_STRICT_ENABLED */
7464# else
7465 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
7466 if (cParams > 0)
7467 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
7468 if (cParams > 1)
7469 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
7470 if (cParams > 2)
7471 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
7472# endif
7473
7474 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
7475
7476# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
7477 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
7478# endif
7479
7480#elif RT_ARCH_ARM64
7481 /*
7482 * ARM64:
7483 */
7484 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7485 if (cParams > 0)
7486 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
7487 if (cParams > 1)
7488 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
7489 if (cParams > 2)
7490 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
7491
7492 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
7493
7494#else
7495# error "port me"
7496#endif
7497
7498 /*
7499 * Check the status code.
7500 */
7501 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
7502
7503 return off;
7504}
7505
7506#ifdef VBOX_WITH_STATISTICS
7507/**
7508 * Emits code to update the thread call statistics.
7509 */
7510DECL_INLINE_THROW(uint32_t)
7511iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
7512{
7513 /*
7514 * Update threaded function stats.
7515 */
7516 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
7517 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
7518# if defined(RT_ARCH_ARM64)
7519 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
7520 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
7521 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
7522 iemNativeRegFreeTmp(pReNative, idxTmp1);
7523 iemNativeRegFreeTmp(pReNative, idxTmp2);
7524# else
7525 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
7526# endif
7527 return off;
7528}
7529#endif /* VBOX_WITH_STATISTICS */
7530
7531
7532/**
7533 * Emits the code at the ReturnWithFlags label (returns
7534 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
7535 */
7536static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7537{
7538 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
7539 if (idxLabel != UINT32_MAX)
7540 {
7541 iemNativeLabelDefine(pReNative, idxLabel, off);
7542
7543 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
7544
7545 /* jump back to the return sequence. */
7546 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7547 }
7548 return off;
7549}
7550
7551
7552/**
7553 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
7554 */
7555static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7556{
7557 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
7558 if (idxLabel != UINT32_MAX)
7559 {
7560 iemNativeLabelDefine(pReNative, idxLabel, off);
7561
7562 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
7563
7564 /* jump back to the return sequence. */
7565 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7566 }
7567 return off;
7568}
7569
7570
7571/**
7572 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
7573 */
7574static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7575{
7576 /*
7577 * Generate the rc + rcPassUp fiddling code if needed.
7578 */
7579 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
7580 if (idxLabel != UINT32_MAX)
7581 {
7582 iemNativeLabelDefine(pReNative, idxLabel, off);
7583
7584 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
7585#ifdef RT_ARCH_AMD64
7586# ifdef RT_OS_WINDOWS
7587# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7588 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
7589# endif
7590 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
7591 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
7592# else
7593 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
7594 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
7595# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7596 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
7597# endif
7598# endif
7599# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7600 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
7601# endif
7602
7603#else
7604 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
7605 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7606 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
7607#endif
7608
7609 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
7610 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7611 }
7612 return off;
7613}
7614
7615
7616/**
7617 * Emits a standard epilog.
7618 */
7619static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
7620{
7621 *pidxReturnLabel = UINT32_MAX;
7622
7623 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
7624 off = iemNativeRegFlushPendingWrites(pReNative, off);
7625
7626 /*
7627 * Successful return, so clear the return register (eax, w0).
7628 */
7629 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
7630
7631 /*
7632 * Define label for common return point.
7633 */
7634 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
7635 *pidxReturnLabel = idxReturn;
7636
7637 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
7638
7639 /*
7640 * Restore registers and return.
7641 */
7642#ifdef RT_ARCH_AMD64
7643 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
7644
7645 /* Reposition esp at the r15 restore point. */
7646 pbCodeBuf[off++] = X86_OP_REX_W;
7647 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
7648 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
7649 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
7650
7651 /* Pop non-volatile registers and return */
7652 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
7653 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
7654 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
7655 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
7656 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
7657 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
7658 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
7659 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
7660# ifdef RT_OS_WINDOWS
7661 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
7662 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
7663# endif
7664 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
7665 pbCodeBuf[off++] = 0xc9; /* leave */
7666 pbCodeBuf[off++] = 0xc3; /* ret */
7667 pbCodeBuf[off++] = 0xcc; /* int3 poison */
7668
7669#elif RT_ARCH_ARM64
7670 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7671
7672 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
7673 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
7674 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
7675 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
7676 IEMNATIVE_FRAME_VAR_SIZE / 8);
7677 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
7678 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7679 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
7680 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7681 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
7682 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7683 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
7684 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7685 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
7686 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7687 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
7688 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
7689
7690 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
7691 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
7692 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
7693 IEMNATIVE_FRAME_SAVE_REG_SIZE);
7694
7695 /* retab / ret */
7696# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
7697 if (1)
7698 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
7699 else
7700# endif
7701 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
7702
7703#else
7704# error "port me"
7705#endif
7706 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7707
7708 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
7709}
7710
7711
7712/**
7713 * Emits a standard prolog.
7714 */
7715static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7716{
7717#ifdef RT_ARCH_AMD64
7718 /*
7719 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
7720 * reserving 64 bytes for stack variables plus 4 non-register argument
7721 * slots. Fixed register assignment: xBX = pReNative;
7722 *
7723 * Since we always do the same register spilling, we can use the same
7724 * unwind description for all the code.
7725 */
7726 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7727 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
7728 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
7729 pbCodeBuf[off++] = 0x8b;
7730 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
7731 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
7732 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
7733# ifdef RT_OS_WINDOWS
7734 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
7735 pbCodeBuf[off++] = 0x8b;
7736 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
7737 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
7738 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
7739# else
7740 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
7741 pbCodeBuf[off++] = 0x8b;
7742 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
7743# endif
7744 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
7745 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
7746 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
7747 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
7748 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
7749 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
7750 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
7751 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
7752
7753# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
7754 /* Save the frame pointer. */
7755 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
7756# endif
7757
7758 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
7759 X86_GREG_xSP,
7760 IEMNATIVE_FRAME_ALIGN_SIZE
7761 + IEMNATIVE_FRAME_VAR_SIZE
7762 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
7763 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
7764 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
7765 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
7766 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
7767
7768#elif RT_ARCH_ARM64
7769 /*
7770 * We set up a stack frame exactly like on x86, only we have to push the
7771 * return address our selves here. We save all non-volatile registers.
7772 */
7773 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
7774
7775# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
7776 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
7777 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
7778 * in any way conditional, so just emitting this instructions now and hoping for the best... */
7779 /* pacibsp */
7780 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
7781# endif
7782
7783 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
7784 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
7785 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
7786 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
7787 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
7788 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
7789 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7790 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
7791 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7792 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
7793 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7794 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
7795 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7796 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
7797 /* Save the BP and LR (ret address) registers at the top of the frame. */
7798 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7799 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
7800 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
7801 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
7802 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
7803 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
7804
7805 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
7806 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
7807
7808 /* mov r28, r0 */
7809 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
7810 /* mov r27, r1 */
7811 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
7812
7813# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
7814 /* Save the frame pointer. */
7815 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
7816 ARMV8_A64_REG_X2);
7817# endif
7818
7819#else
7820# error "port me"
7821#endif
7822 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7823 return off;
7824}
7825
7826
7827/*********************************************************************************************************************************
7828* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
7829*********************************************************************************************************************************/
7830
7831/**
7832 * Internal work that allocates a variable with kind set to
7833 * kIemNativeVarKind_Invalid and no current stack allocation.
7834 *
7835 * The kind will either be set by the caller or later when the variable is first
7836 * assigned a value.
7837 *
7838 * @returns Unpacked index.
7839 * @internal
7840 */
7841static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7842{
7843 Assert(cbType > 0 && cbType <= 64);
7844 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
7845 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
7846 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
7847 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7848 pReNative->Core.aVars[idxVar].cbVar = cbType;
7849 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7850 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7851 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
7852 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
7853 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
7854 pReNative->Core.aVars[idxVar].fRegAcquired = false;
7855 pReNative->Core.aVars[idxVar].u.uValue = 0;
7856#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7857 pReNative->Core.aVars[idxVar].fSimdReg = false;
7858#endif
7859 return idxVar;
7860}
7861
7862
7863/**
7864 * Internal work that allocates an argument variable w/o setting enmKind.
7865 *
7866 * @returns Unpacked index.
7867 * @internal
7868 */
7869static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7870{
7871 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
7872 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7873 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
7874
7875 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7876 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
7877 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
7878 return idxVar;
7879}
7880
7881
7882/**
7883 * Gets the stack slot for a stack variable, allocating one if necessary.
7884 *
7885 * Calling this function implies that the stack slot will contain a valid
7886 * variable value. The caller deals with any register currently assigned to the
7887 * variable, typically by spilling it into the stack slot.
7888 *
7889 * @returns The stack slot number.
7890 * @param pReNative The recompiler state.
7891 * @param idxVar The variable.
7892 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
7893 */
7894DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7895{
7896 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7897 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7898 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
7899
7900 /* Already got a slot? */
7901 uint8_t const idxStackSlot = pVar->idxStackSlot;
7902 if (idxStackSlot != UINT8_MAX)
7903 {
7904 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
7905 return idxStackSlot;
7906 }
7907
7908 /*
7909 * A single slot is easy to allocate.
7910 * Allocate them from the top end, closest to BP, to reduce the displacement.
7911 */
7912 if (pVar->cbVar <= sizeof(uint64_t))
7913 {
7914 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7915 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7916 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
7917 pVar->idxStackSlot = (uint8_t)iSlot;
7918 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
7919 return (uint8_t)iSlot;
7920 }
7921
7922 /*
7923 * We need more than one stack slot.
7924 *
7925 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
7926 */
7927 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
7928 Assert(pVar->cbVar <= 64);
7929 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
7930 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
7931 uint32_t bmStack = pReNative->Core.bmStack;
7932 while (bmStack != UINT32_MAX)
7933 {
7934 unsigned iSlot = ASMBitLastSetU32(~bmStack);
7935 AssertStmt(iSlot, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7936 iSlot = (iSlot - 1) & ~fBitAlignMask;
7937 if ((bmStack & ~(fBitAllocMask << iSlot)) == bmStack)
7938 {
7939 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
7940 pVar->idxStackSlot = (uint8_t)iSlot;
7941 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7942 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
7943 return (uint8_t)iSlot;
7944 }
7945
7946 bmStack |= (fBitAllocMask << iSlot);
7947 }
7948 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7949}
7950
7951
7952/**
7953 * Changes the variable to a stack variable.
7954 *
7955 * Currently this is s only possible to do the first time the variable is used,
7956 * switching later is can be implemented but not done.
7957 *
7958 * @param pReNative The recompiler state.
7959 * @param idxVar The variable.
7960 * @throws VERR_IEM_VAR_IPE_2
7961 */
7962DECL_HIDDEN_THROW(void) iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7963{
7964 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7965 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7966 if (pVar->enmKind != kIemNativeVarKind_Stack)
7967 {
7968 /* We could in theory transition from immediate to stack as well, but it
7969 would involve the caller doing work storing the value on the stack. So,
7970 till that's required we only allow transition from invalid. */
7971 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7972 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7973 pVar->enmKind = kIemNativeVarKind_Stack;
7974
7975 /* Note! We don't allocate a stack slot here, that's only done when a
7976 slot is actually needed to hold a variable value. */
7977 }
7978}
7979
7980
7981/**
7982 * Sets it to a variable with a constant value.
7983 *
7984 * This does not require stack storage as we know the value and can always
7985 * reload it, unless of course it's referenced.
7986 *
7987 * @param pReNative The recompiler state.
7988 * @param idxVar The variable.
7989 * @param uValue The immediate value.
7990 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7991 */
7992DECL_HIDDEN_THROW(void) iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
7993{
7994 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7995 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7996 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7997 {
7998 /* Only simple transitions for now. */
7999 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8000 pVar->enmKind = kIemNativeVarKind_Immediate;
8001 }
8002 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8003
8004 pVar->u.uValue = uValue;
8005 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
8006 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
8007 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
8008}
8009
8010
8011/**
8012 * Sets the variable to a reference (pointer) to @a idxOtherVar.
8013 *
8014 * This does not require stack storage as we know the value and can always
8015 * reload it. Loading is postponed till needed.
8016 *
8017 * @param pReNative The recompiler state.
8018 * @param idxVar The variable. Unpacked.
8019 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
8020 *
8021 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
8022 * @internal
8023 */
8024static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
8025{
8026 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
8027 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
8028
8029 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
8030 {
8031 /* Only simple transitions for now. */
8032 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
8033 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8034 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
8035 }
8036 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8037
8038 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
8039
8040 /* Update the other variable, ensure it's a stack variable. */
8041 /** @todo handle variables with const values... that'll go boom now. */
8042 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
8043 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8044}
8045
8046
8047/**
8048 * Sets the variable to a reference (pointer) to a guest register reference.
8049 *
8050 * This does not require stack storage as we know the value and can always
8051 * reload it. Loading is postponed till needed.
8052 *
8053 * @param pReNative The recompiler state.
8054 * @param idxVar The variable.
8055 * @param enmRegClass The class guest registers to reference.
8056 * @param idxReg The register within @a enmRegClass to reference.
8057 *
8058 * @throws VERR_IEM_VAR_IPE_2
8059 */
8060DECL_HIDDEN_THROW(void) iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
8061 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
8062{
8063 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8064 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8065
8066 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
8067 {
8068 /* Only simple transitions for now. */
8069 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8070 pVar->enmKind = kIemNativeVarKind_GstRegRef;
8071 }
8072 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
8073
8074 pVar->u.GstRegRef.enmClass = enmRegClass;
8075 pVar->u.GstRegRef.idx = idxReg;
8076}
8077
8078
8079DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
8080{
8081 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
8082}
8083
8084
8085DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
8086{
8087 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
8088
8089 /* Since we're using a generic uint64_t value type, we must truncate it if
8090 the variable is smaller otherwise we may end up with too large value when
8091 scaling up a imm8 w/ sign-extension.
8092
8093 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
8094 in the bios, bx=1) when running on arm, because clang expect 16-bit
8095 register parameters to have bits 16 and up set to zero. Instead of
8096 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
8097 CF value in the result. */
8098 switch (cbType)
8099 {
8100 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
8101 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
8102 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
8103 }
8104 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
8105 return idxVar;
8106}
8107
8108
8109DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
8110{
8111 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
8112 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
8113 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
8114 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
8115 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
8116 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
8117
8118 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
8119 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
8120 return idxArgVar;
8121}
8122
8123
8124DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
8125{
8126 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
8127 /* Don't set to stack now, leave that to the first use as for instance
8128 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
8129 return idxVar;
8130}
8131
8132
8133DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
8134{
8135 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
8136
8137 /* Since we're using a generic uint64_t value type, we must truncate it if
8138 the variable is smaller otherwise we may end up with too large value when
8139 scaling up a imm8 w/ sign-extension. */
8140 switch (cbType)
8141 {
8142 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
8143 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
8144 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
8145 }
8146 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
8147 return idxVar;
8148}
8149
8150
8151DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocAssign(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t cbType, uint8_t idxVarOther)
8152{
8153 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
8154 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8155
8156 uint8_t const idxVarOtherReg = iemNativeVarRegisterAcquire(pReNative, idxVarOther, poff, true /*fInitialized*/);
8157 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, poff);
8158
8159 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxVarReg, idxVarOtherReg);
8160
8161 /* Truncate the value to this variables size. */
8162 switch (cbType)
8163 {
8164 case sizeof(uint8_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xff)); break;
8165 case sizeof(uint16_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffff)); break;
8166 case sizeof(uint32_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffffffff)); break;
8167 }
8168
8169 iemNativeVarRegisterRelease(pReNative, idxVarOther);
8170 iemNativeVarRegisterRelease(pReNative, idxVar);
8171 return idxVar;
8172}
8173
8174
8175/**
8176 * Makes sure variable @a idxVar has a register assigned to it and that it stays
8177 * fixed till we call iemNativeVarRegisterRelease.
8178 *
8179 * @returns The host register number.
8180 * @param pReNative The recompiler state.
8181 * @param idxVar The variable.
8182 * @param poff Pointer to the instruction buffer offset.
8183 * In case a register needs to be freed up or the value
8184 * loaded off the stack.
8185 * @param fInitialized Set if the variable must already have been initialized.
8186 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
8187 * the case.
8188 * @param idxRegPref Preferred register number or UINT8_MAX.
8189 */
8190DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
8191 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
8192{
8193 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8194 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8195 Assert(pVar->cbVar <= 8);
8196 Assert(!pVar->fRegAcquired);
8197
8198 uint8_t idxReg = pVar->idxReg;
8199 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8200 {
8201 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
8202 && pVar->enmKind < kIemNativeVarKind_End);
8203 pVar->fRegAcquired = true;
8204 return idxReg;
8205 }
8206
8207 /*
8208 * If the kind of variable has not yet been set, default to 'stack'.
8209 */
8210 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
8211 && pVar->enmKind < kIemNativeVarKind_End);
8212 if (pVar->enmKind == kIemNativeVarKind_Invalid)
8213 iemNativeVarSetKindToStack(pReNative, idxVar);
8214
8215 /*
8216 * We have to allocate a register for the variable, even if its a stack one
8217 * as we don't know if there are modification being made to it before its
8218 * finalized (todo: analyze and insert hints about that?).
8219 *
8220 * If we can, we try get the correct register for argument variables. This
8221 * is assuming that most argument variables are fetched as close as possible
8222 * to the actual call, so that there aren't any interfering hidden calls
8223 * (memory accesses, etc) inbetween.
8224 *
8225 * If we cannot or it's a variable, we make sure no argument registers
8226 * that will be used by this MC block will be allocated here, and we always
8227 * prefer non-volatile registers to avoid needing to spill stuff for internal
8228 * call.
8229 */
8230 /** @todo Detect too early argument value fetches and warn about hidden
8231 * calls causing less optimal code to be generated in the python script. */
8232
8233 uint8_t const uArgNo = pVar->uArgNo;
8234 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
8235 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
8236 {
8237 idxReg = g_aidxIemNativeCallRegs[uArgNo];
8238
8239#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8240 /* Writeback any dirty shadow registers we are about to unshadow. */
8241 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
8242#endif
8243
8244 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
8245 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
8246 }
8247 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
8248 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
8249 {
8250 /** @todo there must be a better way for this and boot cArgsX? */
8251 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgsX, IEMNATIVE_CALL_ARG_GREG_COUNT)];
8252 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
8253 & ~pReNative->Core.bmHstRegsWithGstShadow
8254 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
8255 & fNotArgsMask;
8256 if (fRegs)
8257 {
8258 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
8259 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
8260 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
8261 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
8262 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
8263 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
8264 }
8265 else
8266 {
8267 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
8268 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
8269 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
8270 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
8271 }
8272 }
8273 else
8274 {
8275 idxReg = idxRegPref;
8276 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
8277 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
8278 }
8279 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
8280 pVar->idxReg = idxReg;
8281
8282#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8283 pVar->fSimdReg = false;
8284#endif
8285
8286 /*
8287 * Load it off the stack if we've got a stack slot.
8288 */
8289 uint8_t const idxStackSlot = pVar->idxStackSlot;
8290 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
8291 {
8292 Assert(fInitialized);
8293 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
8294 switch (pVar->cbVar)
8295 {
8296 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
8297 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
8298 case 3: AssertFailed(); RT_FALL_THRU();
8299 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
8300 default: AssertFailed(); RT_FALL_THRU();
8301 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
8302 }
8303 }
8304 else
8305 {
8306 Assert(idxStackSlot == UINT8_MAX);
8307 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8308 }
8309 pVar->fRegAcquired = true;
8310 return idxReg;
8311}
8312
8313
8314#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8315/**
8316 * Makes sure variable @a idxVar has a SIMD register assigned to it and that it stays
8317 * fixed till we call iemNativeVarRegisterRelease.
8318 *
8319 * @returns The host register number.
8320 * @param pReNative The recompiler state.
8321 * @param idxVar The variable.
8322 * @param poff Pointer to the instruction buffer offset.
8323 * In case a register needs to be freed up or the value
8324 * loaded off the stack.
8325 * @param fInitialized Set if the variable must already have been initialized.
8326 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
8327 * the case.
8328 * @param idxRegPref Preferred SIMD register number or UINT8_MAX.
8329 */
8330DECL_HIDDEN_THROW(uint8_t) iemNativeVarSimdRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
8331 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
8332{
8333 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8334 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8335 Assert( pVar->cbVar == sizeof(RTUINT128U)
8336 || pVar->cbVar == sizeof(RTUINT256U));
8337 Assert(!pVar->fRegAcquired);
8338
8339 uint8_t idxReg = pVar->idxReg;
8340 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs))
8341 {
8342 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
8343 && pVar->enmKind < kIemNativeVarKind_End);
8344 pVar->fRegAcquired = true;
8345 return idxReg;
8346 }
8347
8348 /*
8349 * If the kind of variable has not yet been set, default to 'stack'.
8350 */
8351 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
8352 && pVar->enmKind < kIemNativeVarKind_End);
8353 if (pVar->enmKind == kIemNativeVarKind_Invalid)
8354 iemNativeVarSetKindToStack(pReNative, idxVar);
8355
8356 /*
8357 * We have to allocate a register for the variable, even if its a stack one
8358 * as we don't know if there are modification being made to it before its
8359 * finalized (todo: analyze and insert hints about that?).
8360 *
8361 * If we can, we try get the correct register for argument variables. This
8362 * is assuming that most argument variables are fetched as close as possible
8363 * to the actual call, so that there aren't any interfering hidden calls
8364 * (memory accesses, etc) inbetween.
8365 *
8366 * If we cannot or it's a variable, we make sure no argument registers
8367 * that will be used by this MC block will be allocated here, and we always
8368 * prefer non-volatile registers to avoid needing to spill stuff for internal
8369 * call.
8370 */
8371 /** @todo Detect too early argument value fetches and warn about hidden
8372 * calls causing less optimal code to be generated in the python script. */
8373
8374 uint8_t const uArgNo = pVar->uArgNo;
8375 Assert(uArgNo == UINT8_MAX); RT_NOREF(uArgNo); /* No SIMD registers as arguments for now. */
8376
8377 /* SIMD is bit simpler for now because there is no support for arguments. */
8378 if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
8379 || (pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegPref)))
8380 {
8381 uint32_t const fNotArgsMask = UINT32_MAX; //~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
8382 uint32_t const fRegs = ~pReNative->Core.bmHstSimdRegs
8383 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
8384 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
8385 & fNotArgsMask;
8386 if (fRegs)
8387 {
8388 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
8389 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
8390 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows == 0);
8391 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg)));
8392 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
8393 }
8394 else
8395 {
8396 idxReg = iemNativeSimdRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
8397 IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & fNotArgsMask);
8398 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
8399 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
8400 }
8401 }
8402 else
8403 {
8404 idxReg = idxRegPref;
8405 AssertReleaseFailed(); //iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
8406 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
8407 }
8408 iemNativeSimdRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
8409
8410 pVar->fSimdReg = true;
8411 pVar->idxReg = idxReg;
8412
8413 /*
8414 * Load it off the stack if we've got a stack slot.
8415 */
8416 uint8_t const idxStackSlot = pVar->idxStackSlot;
8417 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
8418 {
8419 Assert(fInitialized);
8420 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
8421 switch (pVar->cbVar)
8422 {
8423 case sizeof(RTUINT128U): *poff = iemNativeEmitLoadVecRegByBpU128(pReNative, *poff, idxReg, offDispBp); break;
8424 default: AssertFailed(); RT_FALL_THRU();
8425 case sizeof(RTUINT256U): *poff = iemNativeEmitLoadVecRegByBpU256(pReNative, *poff, idxReg, offDispBp); break;
8426 }
8427 }
8428 else
8429 {
8430 Assert(idxStackSlot == UINT8_MAX);
8431 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8432 }
8433 pVar->fRegAcquired = true;
8434 return idxReg;
8435}
8436#endif
8437
8438
8439/**
8440 * The value of variable @a idxVar will be written in full to the @a enmGstReg
8441 * guest register.
8442 *
8443 * This function makes sure there is a register for it and sets it to be the
8444 * current shadow copy of @a enmGstReg.
8445 *
8446 * @returns The host register number.
8447 * @param pReNative The recompiler state.
8448 * @param idxVar The variable.
8449 * @param enmGstReg The guest register this variable will be written to
8450 * after this call.
8451 * @param poff Pointer to the instruction buffer offset.
8452 * In case a register needs to be freed up or if the
8453 * variable content needs to be loaded off the stack.
8454 *
8455 * @note We DO NOT expect @a idxVar to be an argument variable,
8456 * because we can only in the commit stage of an instruction when this
8457 * function is used.
8458 */
8459DECL_HIDDEN_THROW(uint8_t)
8460iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
8461{
8462 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8463 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8464 Assert(!pVar->fRegAcquired);
8465 AssertMsgStmt( pVar->cbVar <= 8
8466 && ( pVar->enmKind == kIemNativeVarKind_Immediate
8467 || pVar->enmKind == kIemNativeVarKind_Stack),
8468 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
8469 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
8470 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
8471
8472 /*
8473 * This shouldn't ever be used for arguments, unless it's in a weird else
8474 * branch that doesn't do any calling and even then it's questionable.
8475 *
8476 * However, in case someone writes crazy wrong MC code and does register
8477 * updates before making calls, just use the regular register allocator to
8478 * ensure we get a register suitable for the intended argument number.
8479 */
8480 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
8481
8482 /*
8483 * If there is already a register for the variable, we transfer/set the
8484 * guest shadow copy assignment to it.
8485 */
8486 uint8_t idxReg = pVar->idxReg;
8487 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8488 {
8489#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8490 if (enmGstReg >= kIemNativeGstReg_GprFirst && enmGstReg <= kIemNativeGstReg_GprLast)
8491 {
8492# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8493 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
8494 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
8495# endif
8496 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
8497 }
8498#endif
8499
8500 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
8501 {
8502 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
8503 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
8504 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
8505 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
8506 }
8507 else
8508 {
8509 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
8510 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
8511 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
8512 }
8513 /** @todo figure this one out. We need some way of making sure the register isn't
8514 * modified after this point, just in case we start writing crappy MC code. */
8515 pVar->enmGstReg = enmGstReg;
8516 pVar->fRegAcquired = true;
8517 return idxReg;
8518 }
8519 Assert(pVar->uArgNo == UINT8_MAX);
8520
8521 /*
8522 * Because this is supposed to be the commit stage, we're just tag along with the
8523 * temporary register allocator and upgrade it to a variable register.
8524 */
8525 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
8526 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
8527 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
8528 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
8529 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
8530 pVar->idxReg = idxReg;
8531
8532 /*
8533 * Now we need to load the register value.
8534 */
8535 if (pVar->enmKind == kIemNativeVarKind_Immediate)
8536 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
8537 else
8538 {
8539 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8540 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
8541 switch (pVar->cbVar)
8542 {
8543 case sizeof(uint64_t):
8544 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
8545 break;
8546 case sizeof(uint32_t):
8547 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
8548 break;
8549 case sizeof(uint16_t):
8550 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
8551 break;
8552 case sizeof(uint8_t):
8553 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
8554 break;
8555 default:
8556 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
8557 }
8558 }
8559
8560 pVar->fRegAcquired = true;
8561 return idxReg;
8562}
8563
8564
8565/**
8566 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
8567 *
8568 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
8569 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
8570 * requirement of flushing anything in volatile host registers when making a
8571 * call.
8572 *
8573 * @returns New @a off value.
8574 * @param pReNative The recompiler state.
8575 * @param off The code buffer position.
8576 * @param fHstRegsNotToSave Set of registers not to save & restore.
8577 */
8578DECL_HIDDEN_THROW(uint32_t)
8579iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
8580{
8581 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
8582 if (fHstRegs)
8583 {
8584 do
8585 {
8586 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8587 fHstRegs &= ~RT_BIT_32(idxHstReg);
8588
8589 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
8590 {
8591 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
8592 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8593 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8594 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8595 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
8596 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8597 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8598 {
8599 case kIemNativeVarKind_Stack:
8600 {
8601 /* Temporarily spill the variable register. */
8602 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8603 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
8604 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8605 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8606 continue;
8607 }
8608
8609 case kIemNativeVarKind_Immediate:
8610 case kIemNativeVarKind_VarRef:
8611 case kIemNativeVarKind_GstRegRef:
8612 /* It is weird to have any of these loaded at this point. */
8613 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8614 continue;
8615
8616 case kIemNativeVarKind_End:
8617 case kIemNativeVarKind_Invalid:
8618 break;
8619 }
8620 AssertFailed();
8621 }
8622 else
8623 {
8624 /*
8625 * Allocate a temporary stack slot and spill the register to it.
8626 */
8627 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
8628 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
8629 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
8630 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
8631 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
8632 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
8633 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8634 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8635 }
8636 } while (fHstRegs);
8637 }
8638#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8639
8640 /*
8641 * Guest register shadows are flushed to CPUMCTX at the moment and don't need allocating a stack slot
8642 * which would be more difficult due to spanning multiple stack slots and different sizes
8643 * (besides we only have a limited amount of slots at the moment).
8644 *
8645 * However the shadows need to be flushed out as the guest SIMD register might get corrupted by
8646 * the callee. This asserts that the registers were written back earlier and are not in the dirty state.
8647 */
8648 iemNativeSimdRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK);
8649
8650 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
8651 if (fHstRegs)
8652 {
8653 do
8654 {
8655 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8656 fHstRegs &= ~RT_BIT_32(idxHstReg);
8657
8658 /* Fixed reserved and temporary registers don't need saving. */
8659 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved
8660 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp)
8661 continue;
8662
8663 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
8664
8665 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
8666 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8667 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8668 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8669 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
8670 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
8671 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
8672 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
8673 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8674 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8675 {
8676 case kIemNativeVarKind_Stack:
8677 {
8678 /* Temporarily spill the variable register. */
8679 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
8680 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8681 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
8682 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8683 if (cbVar == sizeof(RTUINT128U))
8684 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8685 else
8686 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8687 continue;
8688 }
8689
8690 case kIemNativeVarKind_Immediate:
8691 case kIemNativeVarKind_VarRef:
8692 case kIemNativeVarKind_GstRegRef:
8693 /* It is weird to have any of these loaded at this point. */
8694 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8695 continue;
8696
8697 case kIemNativeVarKind_End:
8698 case kIemNativeVarKind_Invalid:
8699 break;
8700 }
8701 AssertFailed();
8702 } while (fHstRegs);
8703 }
8704#endif
8705 return off;
8706}
8707
8708
8709/**
8710 * Emit code to restore volatile registers after to a call to a helper.
8711 *
8712 * @returns New @a off value.
8713 * @param pReNative The recompiler state.
8714 * @param off The code buffer position.
8715 * @param fHstRegsNotToSave Set of registers not to save & restore.
8716 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
8717 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
8718 */
8719DECL_HIDDEN_THROW(uint32_t)
8720iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
8721{
8722 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
8723 if (fHstRegs)
8724 {
8725 do
8726 {
8727 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8728 fHstRegs &= ~RT_BIT_32(idxHstReg);
8729
8730 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
8731 {
8732 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
8733 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8734 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8735 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8736 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
8737 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8738 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8739 {
8740 case kIemNativeVarKind_Stack:
8741 {
8742 /* Unspill the variable register. */
8743 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8744 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
8745 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8746 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8747 continue;
8748 }
8749
8750 case kIemNativeVarKind_Immediate:
8751 case kIemNativeVarKind_VarRef:
8752 case kIemNativeVarKind_GstRegRef:
8753 /* It is weird to have any of these loaded at this point. */
8754 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8755 continue;
8756
8757 case kIemNativeVarKind_End:
8758 case kIemNativeVarKind_Invalid:
8759 break;
8760 }
8761 AssertFailed();
8762 }
8763 else
8764 {
8765 /*
8766 * Restore from temporary stack slot.
8767 */
8768 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
8769 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
8770 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
8771 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
8772
8773 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8774 }
8775 } while (fHstRegs);
8776 }
8777#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8778 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
8779 if (fHstRegs)
8780 {
8781 do
8782 {
8783 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8784 fHstRegs &= ~RT_BIT_32(idxHstReg);
8785
8786 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp
8787 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved)
8788 continue;
8789 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
8790
8791 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
8792 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8793 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8794 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8795 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
8796 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
8797 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
8798 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
8799 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8800 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8801 {
8802 case kIemNativeVarKind_Stack:
8803 {
8804 /* Unspill the variable register. */
8805 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
8806 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8807 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
8808 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8809
8810 if (cbVar == sizeof(RTUINT128U))
8811 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8812 else
8813 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8814 continue;
8815 }
8816
8817 case kIemNativeVarKind_Immediate:
8818 case kIemNativeVarKind_VarRef:
8819 case kIemNativeVarKind_GstRegRef:
8820 /* It is weird to have any of these loaded at this point. */
8821 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8822 continue;
8823
8824 case kIemNativeVarKind_End:
8825 case kIemNativeVarKind_Invalid:
8826 break;
8827 }
8828 AssertFailed();
8829 } while (fHstRegs);
8830 }
8831#endif
8832 return off;
8833}
8834
8835
8836/**
8837 * Worker that frees the stack slots for variable @a idxVar if any allocated.
8838 *
8839 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
8840 *
8841 * ASSUMES that @a idxVar is valid and unpacked.
8842 */
8843DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8844{
8845 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
8846 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
8847 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
8848 {
8849 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
8850 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
8851 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
8852 Assert(cSlots > 0);
8853 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
8854 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
8855 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
8856 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
8857 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
8858 }
8859 else
8860 Assert(idxStackSlot == UINT8_MAX);
8861}
8862
8863
8864/**
8865 * Worker that frees a single variable.
8866 *
8867 * ASSUMES that @a idxVar is valid and unpacked.
8868 */
8869DECLHIDDEN(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8870{
8871 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
8872 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
8873 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
8874
8875 /* Free the host register first if any assigned. */
8876 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8877#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8878 if ( idxHstReg != UINT8_MAX
8879 && pReNative->Core.aVars[idxVar].fSimdReg)
8880 {
8881 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8882 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8883 pReNative->Core.aHstSimdRegs[idxHstReg].idxVar = UINT8_MAX;
8884 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
8885 }
8886 else
8887#endif
8888 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8889 {
8890 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8891 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8892 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8893 }
8894
8895 /* Free argument mapping. */
8896 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
8897 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
8898 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
8899
8900 /* Free the stack slots. */
8901 iemNativeVarFreeStackSlots(pReNative, idxVar);
8902
8903 /* Free the actual variable. */
8904 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
8905 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8906}
8907
8908
8909/**
8910 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
8911 */
8912DECLHIDDEN(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
8913{
8914 while (bmVars != 0)
8915 {
8916 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8917 bmVars &= ~RT_BIT_32(idxVar);
8918
8919#if 1 /** @todo optimize by simplifying this later... */
8920 iemNativeVarFreeOneWorker(pReNative, idxVar);
8921#else
8922 /* Only need to free the host register, the rest is done as bulk updates below. */
8923 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8924 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8925 {
8926 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8927 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8928 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8929 }
8930#endif
8931 }
8932#if 0 /** @todo optimize by simplifying this later... */
8933 pReNative->Core.bmVars = 0;
8934 pReNative->Core.bmStack = 0;
8935 pReNative->Core.u64ArgVars = UINT64_MAX;
8936#endif
8937}
8938
8939
8940
8941/*********************************************************************************************************************************
8942* Emitters for IEM_MC_CALL_CIMPL_XXX *
8943*********************************************************************************************************************************/
8944
8945/**
8946 * Emits code to load a reference to the given guest register into @a idxGprDst.
8947 */
8948DECL_HIDDEN_THROW(uint32_t)
8949iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
8950 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
8951{
8952#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8953 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
8954#endif
8955
8956 /*
8957 * Get the offset relative to the CPUMCTX structure.
8958 */
8959 uint32_t offCpumCtx;
8960 switch (enmClass)
8961 {
8962 case kIemNativeGstRegRef_Gpr:
8963 Assert(idxRegInClass < 16);
8964 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
8965 break;
8966
8967 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
8968 Assert(idxRegInClass < 4);
8969 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
8970 break;
8971
8972 case kIemNativeGstRegRef_EFlags:
8973 Assert(idxRegInClass == 0);
8974 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
8975 break;
8976
8977 case kIemNativeGstRegRef_MxCsr:
8978 Assert(idxRegInClass == 0);
8979 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
8980 break;
8981
8982 case kIemNativeGstRegRef_FpuReg:
8983 Assert(idxRegInClass < 8);
8984 AssertFailed(); /** @todo what kind of indexing? */
8985 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8986 break;
8987
8988 case kIemNativeGstRegRef_MReg:
8989 Assert(idxRegInClass < 8);
8990 AssertFailed(); /** @todo what kind of indexing? */
8991 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8992 break;
8993
8994 case kIemNativeGstRegRef_XReg:
8995 Assert(idxRegInClass < 16);
8996 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
8997 break;
8998
8999 case kIemNativeGstRegRef_X87: /* Not a register actually but we would just duplicate code otherwise. */
9000 Assert(idxRegInClass == 0);
9001 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87);
9002 break;
9003
9004 case kIemNativeGstRegRef_XState: /* Not a register actually but we would just duplicate code otherwise. */
9005 Assert(idxRegInClass == 0);
9006 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState);
9007 break;
9008
9009 default:
9010 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
9011 }
9012
9013 /*
9014 * Load the value into the destination register.
9015 */
9016#ifdef RT_ARCH_AMD64
9017 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
9018
9019#elif defined(RT_ARCH_ARM64)
9020 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
9021 Assert(offCpumCtx < 4096);
9022 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
9023
9024#else
9025# error "Port me!"
9026#endif
9027
9028 return off;
9029}
9030
9031
9032/**
9033 * Common code for CIMPL and AIMPL calls.
9034 *
9035 * These are calls that uses argument variables and such. They should not be
9036 * confused with internal calls required to implement an MC operation,
9037 * like a TLB load and similar.
9038 *
9039 * Upon return all that is left to do is to load any hidden arguments and
9040 * perform the call. All argument variables are freed.
9041 *
9042 * @returns New code buffer offset; throws VBox status code on error.
9043 * @param pReNative The native recompile state.
9044 * @param off The code buffer offset.
9045 * @param cArgs The total nubmer of arguments (includes hidden
9046 * count).
9047 * @param cHiddenArgs The number of hidden arguments. The hidden
9048 * arguments must not have any variable declared for
9049 * them, whereas all the regular arguments must
9050 * (tstIEMCheckMc ensures this).
9051 * @param fFlushPendingWrites Flag whether to flush pending writes (default true),
9052 * this will still flush pending writes in call volatile registers if false.
9053 */
9054DECL_HIDDEN_THROW(uint32_t)
9055iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs,
9056 bool fFlushPendingWrites /*= true*/)
9057{
9058#ifdef VBOX_STRICT
9059 /*
9060 * Assert sanity.
9061 */
9062 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
9063 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
9064 for (unsigned i = 0; i < cHiddenArgs; i++)
9065 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
9066 for (unsigned i = cHiddenArgs; i < cArgs; i++)
9067 {
9068 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
9069 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
9070 }
9071 iemNativeRegAssertSanity(pReNative);
9072#endif
9073
9074 /* We don't know what the called function makes use of, so flush any pending register writes. */
9075 RT_NOREF(fFlushPendingWrites);
9076#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
9077 if (fFlushPendingWrites)
9078#endif
9079 off = iemNativeRegFlushPendingWrites(pReNative, off);
9080
9081 /*
9082 * Before we do anything else, go over variables that are referenced and
9083 * make sure they are not in a register.
9084 */
9085 uint32_t bmVars = pReNative->Core.bmVars;
9086 if (bmVars)
9087 {
9088 do
9089 {
9090 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
9091 bmVars &= ~RT_BIT_32(idxVar);
9092
9093 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
9094 {
9095 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
9096#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9097 if ( idxRegOld != UINT8_MAX
9098 && pReNative->Core.aVars[idxVar].fSimdReg)
9099 {
9100 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
9101 Assert(pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U) || pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT256U));
9102
9103 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
9104 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
9105 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
9106 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
9107 if (pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U))
9108 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
9109 else
9110 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
9111
9112 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
9113 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
9114
9115 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
9116 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
9117 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
9118 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
9119 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
9120 }
9121 else
9122#endif
9123 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
9124 {
9125 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
9126 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
9127 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
9128 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
9129 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
9130
9131 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
9132 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
9133 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
9134 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
9135 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
9136 }
9137 }
9138 } while (bmVars != 0);
9139#if 0 //def VBOX_STRICT
9140 iemNativeRegAssertSanity(pReNative);
9141#endif
9142 }
9143
9144 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
9145
9146#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
9147 /*
9148 * At the very first step go over the host registers that will be used for arguments
9149 * don't shadow anything which needs writing back first.
9150 */
9151 for (uint32_t i = 0; i < cRegArgs; i++)
9152 {
9153 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
9154
9155 /* Writeback any dirty guest shadows before using this register. */
9156 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows)
9157 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxArgReg);
9158 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows));
9159 }
9160#endif
9161
9162 /*
9163 * First, go over the host registers that will be used for arguments and make
9164 * sure they either hold the desired argument or are free.
9165 */
9166 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
9167 {
9168 for (uint32_t i = 0; i < cRegArgs; i++)
9169 {
9170 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
9171 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
9172 {
9173 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
9174 {
9175 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
9176 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9177 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9178 Assert(pVar->idxReg == idxArgReg);
9179 uint8_t const uArgNo = pVar->uArgNo;
9180 if (uArgNo == i)
9181 { /* prefect */ }
9182 /* The variable allocator logic should make sure this is impossible,
9183 except for when the return register is used as a parameter (ARM,
9184 but not x86). */
9185#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
9186 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
9187 {
9188# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
9189# error "Implement this"
9190# endif
9191 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
9192 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
9193 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
9194 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
9195 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
9196 }
9197#endif
9198 else
9199 {
9200 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
9201
9202 if (pVar->enmKind == kIemNativeVarKind_Stack)
9203 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
9204 else
9205 {
9206 /* just free it, can be reloaded if used again */
9207 pVar->idxReg = UINT8_MAX;
9208 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
9209 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
9210 }
9211 }
9212 }
9213 else
9214 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
9215 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
9216 }
9217 }
9218#if 0 //def VBOX_STRICT
9219 iemNativeRegAssertSanity(pReNative);
9220#endif
9221 }
9222
9223 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
9224
9225#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
9226 /*
9227 * If there are any stack arguments, make sure they are in their place as well.
9228 *
9229 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
9230 * the caller) be loading it later and it must be free (see first loop).
9231 */
9232 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
9233 {
9234 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
9235 {
9236 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
9237 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
9238 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9239 {
9240 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
9241 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
9242 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
9243 pVar->idxReg = UINT8_MAX;
9244 }
9245 else
9246 {
9247 /* Use ARG0 as temp for stuff we need registers for. */
9248 switch (pVar->enmKind)
9249 {
9250 case kIemNativeVarKind_Stack:
9251 {
9252 uint8_t const idxStackSlot = pVar->idxStackSlot;
9253 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
9254 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
9255 iemNativeStackCalcBpDisp(idxStackSlot));
9256 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
9257 continue;
9258 }
9259
9260 case kIemNativeVarKind_Immediate:
9261 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
9262 continue;
9263
9264 case kIemNativeVarKind_VarRef:
9265 {
9266 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
9267 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
9268 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
9269 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
9270 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
9271# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9272 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
9273 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
9274 if ( fSimdReg
9275 && idxRegOther != UINT8_MAX)
9276 {
9277 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
9278 if (cbVar == sizeof(RTUINT128U))
9279 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
9280 else
9281 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
9282 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
9283 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
9284 }
9285 else
9286# endif
9287 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
9288 {
9289 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
9290 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
9291 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
9292 }
9293 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
9294 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
9295 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
9296 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
9297 continue;
9298 }
9299
9300 case kIemNativeVarKind_GstRegRef:
9301 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
9302 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
9303 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
9304 continue;
9305
9306 case kIemNativeVarKind_Invalid:
9307 case kIemNativeVarKind_End:
9308 break;
9309 }
9310 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
9311 }
9312 }
9313# if 0 //def VBOX_STRICT
9314 iemNativeRegAssertSanity(pReNative);
9315# endif
9316 }
9317#else
9318 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
9319#endif
9320
9321 /*
9322 * Make sure the argument variables are loaded into their respective registers.
9323 *
9324 * We can optimize this by ASSUMING that any register allocations are for
9325 * registeres that have already been loaded and are ready. The previous step
9326 * saw to that.
9327 */
9328 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
9329 {
9330 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
9331 {
9332 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
9333 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
9334 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
9335 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
9336 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
9337 else
9338 {
9339 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
9340 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9341 {
9342 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
9343 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
9344 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
9345 | RT_BIT_32(idxArgReg);
9346 pVar->idxReg = idxArgReg;
9347 }
9348 else
9349 {
9350 /* Use ARG0 as temp for stuff we need registers for. */
9351 switch (pVar->enmKind)
9352 {
9353 case kIemNativeVarKind_Stack:
9354 {
9355 uint8_t const idxStackSlot = pVar->idxStackSlot;
9356 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
9357 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
9358 continue;
9359 }
9360
9361 case kIemNativeVarKind_Immediate:
9362 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
9363 continue;
9364
9365 case kIemNativeVarKind_VarRef:
9366 {
9367 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
9368 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
9369 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
9370 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
9371 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
9372 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
9373#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9374 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
9375 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
9376 if ( fSimdReg
9377 && idxRegOther != UINT8_MAX)
9378 {
9379 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
9380 if (cbVar == sizeof(RTUINT128U))
9381 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
9382 else
9383 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
9384 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
9385 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
9386 }
9387 else
9388#endif
9389 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
9390 {
9391 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
9392 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
9393 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
9394 }
9395 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
9396 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
9397 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
9398 continue;
9399 }
9400
9401 case kIemNativeVarKind_GstRegRef:
9402 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
9403 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
9404 continue;
9405
9406 case kIemNativeVarKind_Invalid:
9407 case kIemNativeVarKind_End:
9408 break;
9409 }
9410 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
9411 }
9412 }
9413 }
9414#if 0 //def VBOX_STRICT
9415 iemNativeRegAssertSanity(pReNative);
9416#endif
9417 }
9418#ifdef VBOX_STRICT
9419 else
9420 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
9421 {
9422 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
9423 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
9424 }
9425#endif
9426
9427 /*
9428 * Free all argument variables (simplified).
9429 * Their lifetime always expires with the call they are for.
9430 */
9431 /** @todo Make the python script check that arguments aren't used after
9432 * IEM_MC_CALL_XXXX. */
9433 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
9434 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
9435 * an argument value. There is also some FPU stuff. */
9436 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
9437 {
9438 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
9439 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
9440
9441 /* no need to free registers: */
9442 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
9443 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
9444 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
9445 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
9446 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
9447 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
9448
9449 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
9450 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
9451 iemNativeVarFreeStackSlots(pReNative, idxVar);
9452 }
9453 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
9454
9455 /*
9456 * Flush volatile registers as we make the call.
9457 */
9458 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
9459
9460 return off;
9461}
9462
9463
9464
9465/*********************************************************************************************************************************
9466* TLB Lookup. *
9467*********************************************************************************************************************************/
9468
9469/**
9470 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
9471 */
9472DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
9473{
9474 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
9475 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
9476 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
9477 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
9478
9479 /* Do the lookup manually. */
9480 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
9481 uint64_t const uTag = IEMTLB_CALC_TAG( &pVCpu->iem.s.DataTlb, GCPtrFlat);
9482 PIEMTLBENTRY const pTlbe = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
9483 if (RT_LIKELY(pTlbe->uTag == uTag))
9484 {
9485 /*
9486 * Check TLB page table level access flags.
9487 */
9488 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
9489 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
9490 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
9491 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
9492 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
9493 | IEMTLBE_F_PG_UNASSIGNED
9494 | IEMTLBE_F_PT_NO_ACCESSED
9495 | fNoWriteNoDirty | fNoUser);
9496 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
9497 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
9498 {
9499 /*
9500 * Return the address.
9501 */
9502 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
9503 if ((uintptr_t)pbAddr == uResult)
9504 return;
9505 RT_NOREF(cbMem);
9506 AssertFailed();
9507 }
9508 else
9509 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
9510 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
9511 }
9512 else
9513 AssertFailed();
9514 RT_BREAKPOINT();
9515}
9516
9517/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
9518
9519
9520
9521/*********************************************************************************************************************************
9522* Recompiler Core. *
9523*********************************************************************************************************************************/
9524
9525/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
9526static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
9527{
9528 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
9529 pDis->cbCachedInstr += cbMaxRead;
9530 RT_NOREF(cbMinRead);
9531 return VERR_NO_DATA;
9532}
9533
9534
9535DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
9536{
9537 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
9538 {
9539#define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */
9540 ENTRY(fLocalForcedActions),
9541 ENTRY(iem.s.rcPassUp),
9542 ENTRY(iem.s.fExec),
9543 ENTRY(iem.s.pbInstrBuf),
9544 ENTRY(iem.s.uInstrBufPc),
9545 ENTRY(iem.s.GCPhysInstrBuf),
9546 ENTRY(iem.s.cbInstrBufTotal),
9547 ENTRY(iem.s.idxTbCurInstr),
9548#ifdef VBOX_WITH_STATISTICS
9549 ENTRY(iem.s.StatNativeTlbHitsForFetch),
9550 ENTRY(iem.s.StatNativeTlbHitsForStore),
9551 ENTRY(iem.s.StatNativeTlbHitsForStack),
9552 ENTRY(iem.s.StatNativeTlbHitsForMapped),
9553 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
9554 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
9555 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
9556 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
9557#endif
9558 ENTRY(iem.s.DataTlb.aEntries),
9559 ENTRY(iem.s.DataTlb.uTlbRevision),
9560 ENTRY(iem.s.DataTlb.uTlbPhysRev),
9561 ENTRY(iem.s.DataTlb.cTlbHits),
9562 ENTRY(iem.s.CodeTlb.aEntries),
9563 ENTRY(iem.s.CodeTlb.uTlbRevision),
9564 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
9565 ENTRY(iem.s.CodeTlb.cTlbHits),
9566 ENTRY(pVMR3),
9567 ENTRY(cpum.GstCtx.rax),
9568 ENTRY(cpum.GstCtx.ah),
9569 ENTRY(cpum.GstCtx.rcx),
9570 ENTRY(cpum.GstCtx.ch),
9571 ENTRY(cpum.GstCtx.rdx),
9572 ENTRY(cpum.GstCtx.dh),
9573 ENTRY(cpum.GstCtx.rbx),
9574 ENTRY(cpum.GstCtx.bh),
9575 ENTRY(cpum.GstCtx.rsp),
9576 ENTRY(cpum.GstCtx.rbp),
9577 ENTRY(cpum.GstCtx.rsi),
9578 ENTRY(cpum.GstCtx.rdi),
9579 ENTRY(cpum.GstCtx.r8),
9580 ENTRY(cpum.GstCtx.r9),
9581 ENTRY(cpum.GstCtx.r10),
9582 ENTRY(cpum.GstCtx.r11),
9583 ENTRY(cpum.GstCtx.r12),
9584 ENTRY(cpum.GstCtx.r13),
9585 ENTRY(cpum.GstCtx.r14),
9586 ENTRY(cpum.GstCtx.r15),
9587 ENTRY(cpum.GstCtx.es.Sel),
9588 ENTRY(cpum.GstCtx.es.u64Base),
9589 ENTRY(cpum.GstCtx.es.u32Limit),
9590 ENTRY(cpum.GstCtx.es.Attr),
9591 ENTRY(cpum.GstCtx.cs.Sel),
9592 ENTRY(cpum.GstCtx.cs.u64Base),
9593 ENTRY(cpum.GstCtx.cs.u32Limit),
9594 ENTRY(cpum.GstCtx.cs.Attr),
9595 ENTRY(cpum.GstCtx.ss.Sel),
9596 ENTRY(cpum.GstCtx.ss.u64Base),
9597 ENTRY(cpum.GstCtx.ss.u32Limit),
9598 ENTRY(cpum.GstCtx.ss.Attr),
9599 ENTRY(cpum.GstCtx.ds.Sel),
9600 ENTRY(cpum.GstCtx.ds.u64Base),
9601 ENTRY(cpum.GstCtx.ds.u32Limit),
9602 ENTRY(cpum.GstCtx.ds.Attr),
9603 ENTRY(cpum.GstCtx.fs.Sel),
9604 ENTRY(cpum.GstCtx.fs.u64Base),
9605 ENTRY(cpum.GstCtx.fs.u32Limit),
9606 ENTRY(cpum.GstCtx.fs.Attr),
9607 ENTRY(cpum.GstCtx.gs.Sel),
9608 ENTRY(cpum.GstCtx.gs.u64Base),
9609 ENTRY(cpum.GstCtx.gs.u32Limit),
9610 ENTRY(cpum.GstCtx.gs.Attr),
9611 ENTRY(cpum.GstCtx.rip),
9612 ENTRY(cpum.GstCtx.eflags),
9613 ENTRY(cpum.GstCtx.uRipInhibitInt),
9614 ENTRY(cpum.GstCtx.cr0),
9615 ENTRY(cpum.GstCtx.cr4),
9616 ENTRY(cpum.GstCtx.aXcr[0]),
9617 ENTRY(cpum.GstCtx.aXcr[1]),
9618#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9619 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
9620 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
9621 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
9622 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
9623 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
9624 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
9625 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
9626 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
9627 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
9628 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
9629 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
9630 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
9631 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
9632 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
9633 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
9634 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
9635 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
9636 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
9637 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
9638 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
9639 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
9640 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
9641 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
9642 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
9643 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
9644 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
9645 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
9646 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
9647 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
9648 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
9649 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
9650 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
9651#endif
9652#undef ENTRY
9653 };
9654#ifdef VBOX_STRICT
9655 static bool s_fOrderChecked = false;
9656 if (!s_fOrderChecked)
9657 {
9658 s_fOrderChecked = true;
9659 uint32_t offPrev = s_aMembers[0].off;
9660 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
9661 {
9662 Assert(s_aMembers[i].off > offPrev);
9663 offPrev = s_aMembers[i].off;
9664 }
9665 }
9666#endif
9667
9668 /*
9669 * Binary lookup.
9670 */
9671 unsigned iStart = 0;
9672 unsigned iEnd = RT_ELEMENTS(s_aMembers);
9673 for (;;)
9674 {
9675 unsigned const iCur = iStart + (iEnd - iStart) / 2;
9676 uint32_t const offCur = s_aMembers[iCur].off;
9677 if (off < offCur)
9678 {
9679 if (iCur != iStart)
9680 iEnd = iCur;
9681 else
9682 break;
9683 }
9684 else if (off > offCur)
9685 {
9686 if (iCur + 1 < iEnd)
9687 iStart = iCur + 1;
9688 else
9689 break;
9690 }
9691 else
9692 return s_aMembers[iCur].pszName;
9693 }
9694#ifdef VBOX_WITH_STATISTICS
9695 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
9696 return "iem.s.acThreadedFuncStats[iFn]";
9697#endif
9698 return NULL;
9699}
9700
9701
9702DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
9703{
9704 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
9705#if defined(RT_ARCH_AMD64)
9706 static const char * const a_apszMarkers[] =
9707 {
9708 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
9709 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
9710 };
9711#endif
9712
9713 char szDisBuf[512];
9714 DISSTATE Dis;
9715 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
9716 uint32_t const cNative = pTb->Native.cInstructions;
9717 uint32_t offNative = 0;
9718#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9719 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
9720#endif
9721 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9722 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9723 : DISCPUMODE_64BIT;
9724#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9725 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
9726#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9727 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
9728#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9729# error "Port me"
9730#else
9731 csh hDisasm = ~(size_t)0;
9732# if defined(RT_ARCH_AMD64)
9733 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
9734# elif defined(RT_ARCH_ARM64)
9735 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
9736# else
9737# error "Port me"
9738# endif
9739 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
9740
9741 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
9742 //Assert(rcCs == CS_ERR_OK);
9743#endif
9744
9745 /*
9746 * Print TB info.
9747 */
9748 pHlp->pfnPrintf(pHlp,
9749 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
9750 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
9751 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
9752 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
9753#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9754 if (pDbgInfo && pDbgInfo->cEntries > 1)
9755 {
9756 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
9757
9758 /*
9759 * This disassembly is driven by the debug info which follows the native
9760 * code and indicates when it starts with the next guest instructions,
9761 * where labels are and such things.
9762 */
9763 uint32_t idxThreadedCall = 0;
9764 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
9765 uint8_t idxRange = UINT8_MAX;
9766 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
9767 uint32_t offRange = 0;
9768 uint32_t offOpcodes = 0;
9769 uint32_t const cbOpcodes = pTb->cbOpcodes;
9770 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
9771 uint32_t const cDbgEntries = pDbgInfo->cEntries;
9772 uint32_t iDbgEntry = 1;
9773 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
9774
9775 while (offNative < cNative)
9776 {
9777 /* If we're at or have passed the point where the next chunk of debug
9778 info starts, process it. */
9779 if (offDbgNativeNext <= offNative)
9780 {
9781 offDbgNativeNext = UINT32_MAX;
9782 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
9783 {
9784 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
9785 {
9786 case kIemTbDbgEntryType_GuestInstruction:
9787 {
9788 /* Did the exec flag change? */
9789 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
9790 {
9791 pHlp->pfnPrintf(pHlp,
9792 " fExec change %#08x -> %#08x %s\n",
9793 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9794 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9795 szDisBuf, sizeof(szDisBuf)));
9796 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
9797 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9798 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9799 : DISCPUMODE_64BIT;
9800 }
9801
9802 /* New opcode range? We need to fend up a spurious debug info entry here for cases
9803 where the compilation was aborted before the opcode was recorded and the actual
9804 instruction was translated to a threaded call. This may happen when we run out
9805 of ranges, or when some complicated interrupts/FFs are found to be pending or
9806 similar. So, we just deal with it here rather than in the compiler code as it
9807 is a lot simpler to do here. */
9808 if ( idxRange == UINT8_MAX
9809 || idxRange >= cRanges
9810 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
9811 {
9812 idxRange += 1;
9813 if (idxRange < cRanges)
9814 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
9815 else
9816 continue;
9817 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
9818 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
9819 + (pTb->aRanges[idxRange].idxPhysPage == 0
9820 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9821 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
9822 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9823 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
9824 pTb->aRanges[idxRange].idxPhysPage);
9825 GCPhysPc += offRange;
9826 }
9827
9828 /* Disassemble the instruction. */
9829 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
9830 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
9831 uint32_t cbInstr = 1;
9832 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9833 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
9834 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9835 if (RT_SUCCESS(rc))
9836 {
9837 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9838 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9839 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9840 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9841
9842 static unsigned const s_offMarker = 55;
9843 static char const s_szMarker[] = " ; <--- guest";
9844 if (cch < s_offMarker)
9845 {
9846 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
9847 cch = s_offMarker;
9848 }
9849 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
9850 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
9851
9852 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
9853 }
9854 else
9855 {
9856 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
9857 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
9858 cbInstr = 1;
9859 }
9860 GCPhysPc += cbInstr;
9861 offOpcodes += cbInstr;
9862 offRange += cbInstr;
9863 continue;
9864 }
9865
9866 case kIemTbDbgEntryType_ThreadedCall:
9867 pHlp->pfnPrintf(pHlp,
9868 " Call #%u to %s (%u args) - %s\n",
9869 idxThreadedCall,
9870 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9871 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9872 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
9873 idxThreadedCall++;
9874 continue;
9875
9876 case kIemTbDbgEntryType_GuestRegShadowing:
9877 {
9878 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9879 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
9880 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
9881 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
9882 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9883 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
9884 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s \n", pszGstReg,
9885 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
9886 else
9887 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
9888 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
9889 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9890 continue;
9891 }
9892
9893#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9894 case kIemTbDbgEntryType_GuestSimdRegShadowing:
9895 {
9896 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9897 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
9898 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
9899 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
9900 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9901 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
9902 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
9903 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
9904 else
9905 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
9906 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
9907 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9908 continue;
9909 }
9910#endif
9911
9912 case kIemTbDbgEntryType_Label:
9913 {
9914 const char *pszName = "what_the_fudge";
9915 const char *pszComment = "";
9916 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
9917 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
9918 {
9919 case kIemNativeLabelType_Return: pszName = "Return"; break;
9920 case kIemNativeLabelType_ReturnBreak: pszName = "ReturnBreak"; break;
9921 case kIemNativeLabelType_ReturnWithFlags: pszName = "ReturnWithFlags"; break;
9922 case kIemNativeLabelType_NonZeroRetOrPassUp: pszName = "NonZeroRetOrPassUp"; break;
9923 case kIemNativeLabelType_RaiseDe: pszName = "RaiseDe"; break;
9924 case kIemNativeLabelType_RaiseUd: pszName = "RaiseUd"; break;
9925 case kIemNativeLabelType_RaiseSseRelated: pszName = "RaiseSseRelated"; break;
9926 case kIemNativeLabelType_RaiseAvxRelated: pszName = "RaiseAvxRelated"; break;
9927 case kIemNativeLabelType_RaiseSseAvxFpRelated: pszName = "RaiseSseAvxFpRelated"; break;
9928 case kIemNativeLabelType_RaiseNm: pszName = "RaiseNm"; break;
9929 case kIemNativeLabelType_RaiseGp0: pszName = "RaiseGp0"; break;
9930 case kIemNativeLabelType_RaiseMf: pszName = "RaiseMf"; break;
9931 case kIemNativeLabelType_RaiseXf: pszName = "RaiseXf"; break;
9932 case kIemNativeLabelType_ObsoleteTb: pszName = "ObsoleteTb"; break;
9933 case kIemNativeLabelType_NeedCsLimChecking: pszName = "NeedCsLimChecking"; break;
9934 case kIemNativeLabelType_CheckBranchMiss: pszName = "CheckBranchMiss"; break;
9935 case kIemNativeLabelType_If:
9936 pszName = "If";
9937 fNumbered = true;
9938 break;
9939 case kIemNativeLabelType_Else:
9940 pszName = "Else";
9941 fNumbered = true;
9942 pszComment = " ; regs state restored pre-if-block";
9943 break;
9944 case kIemNativeLabelType_Endif:
9945 pszName = "Endif";
9946 fNumbered = true;
9947 break;
9948 case kIemNativeLabelType_CheckIrq:
9949 pszName = "CheckIrq_CheckVM";
9950 fNumbered = true;
9951 break;
9952 case kIemNativeLabelType_TlbLookup:
9953 pszName = "TlbLookup";
9954 fNumbered = true;
9955 break;
9956 case kIemNativeLabelType_TlbMiss:
9957 pszName = "TlbMiss";
9958 fNumbered = true;
9959 break;
9960 case kIemNativeLabelType_TlbDone:
9961 pszName = "TlbDone";
9962 fNumbered = true;
9963 break;
9964 case kIemNativeLabelType_Invalid:
9965 case kIemNativeLabelType_End:
9966 break;
9967 }
9968 if (fNumbered)
9969 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
9970 else
9971 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
9972 continue;
9973 }
9974
9975 case kIemTbDbgEntryType_NativeOffset:
9976 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
9977 Assert(offDbgNativeNext >= offNative);
9978 break;
9979
9980#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
9981 case kIemTbDbgEntryType_DelayedPcUpdate:
9982 pHlp->pfnPrintf(pHlp, " Updating guest PC value by %u (cInstrSkipped=%u)\n",
9983 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
9984 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
9985 continue;
9986#endif
9987
9988#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
9989 case kIemTbDbgEntryType_GuestRegDirty:
9990 {
9991 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9992 const char * const pszGstReg = pEntry->GuestRegDirty.fSimdReg
9993 ? g_aGstSimdShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName
9994 : g_aGstShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName;
9995 const char * const pszHstReg = pEntry->GuestRegDirty.fSimdReg
9996 ? g_apszIemNativeHstSimdRegNames[pEntry->GuestRegDirty.idxHstReg]
9997 : g_apszIemNativeHstRegNames[pEntry->GuestRegDirty.idxHstReg];
9998 pHlp->pfnPrintf(pHlp, " Guest register %s (shadowed by %s) is now marked dirty (intent)\n",
9999 pszGstReg, pszHstReg);
10000 continue;
10001 }
10002
10003 case kIemTbDbgEntryType_GuestRegWriteback:
10004 pHlp->pfnPrintf(pHlp, " Writing dirty %s registers (gst %#RX32)\n",
10005 pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fSimdReg ? "SIMD" : "general",
10006 pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fGstReg);
10007 continue;
10008#endif
10009
10010 default:
10011 AssertFailed();
10012 }
10013 iDbgEntry++;
10014 break;
10015 }
10016 }
10017
10018 /*
10019 * Disassemble the next native instruction.
10020 */
10021 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
10022# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
10023 uint32_t cbInstr = sizeof(paNative[0]);
10024 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
10025 if (RT_SUCCESS(rc))
10026 {
10027# if defined(RT_ARCH_AMD64)
10028 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
10029 {
10030 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
10031 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
10032 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
10033 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
10034 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
10035 uInfo & 0x8000 ? "recompiled" : "todo");
10036 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
10037 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
10038 else
10039 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
10040 }
10041 else
10042# endif
10043 {
10044 const char *pszAnnotation = NULL;
10045# ifdef RT_ARCH_AMD64
10046 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
10047 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
10048 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
10049 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
10050 PCDISOPPARAM pMemOp;
10051 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))
10052 pMemOp = &Dis.Param1;
10053 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))
10054 pMemOp = &Dis.Param2;
10055 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))
10056 pMemOp = &Dis.Param3;
10057 else
10058 pMemOp = NULL;
10059 if ( pMemOp
10060 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
10061 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
10062 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
10063 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
10064
10065#elif defined(RT_ARCH_ARM64)
10066 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
10067 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
10068 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
10069# else
10070# error "Port me"
10071# endif
10072 if (pszAnnotation)
10073 {
10074 static unsigned const s_offAnnotation = 55;
10075 size_t const cchAnnotation = strlen(pszAnnotation);
10076 size_t cchDis = strlen(szDisBuf);
10077 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
10078 {
10079 if (cchDis < s_offAnnotation)
10080 {
10081 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
10082 cchDis = s_offAnnotation;
10083 }
10084 szDisBuf[cchDis++] = ' ';
10085 szDisBuf[cchDis++] = ';';
10086 szDisBuf[cchDis++] = ' ';
10087 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
10088 }
10089 }
10090 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
10091 }
10092 }
10093 else
10094 {
10095# if defined(RT_ARCH_AMD64)
10096 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
10097 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
10098# elif defined(RT_ARCH_ARM64)
10099 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
10100# else
10101# error "Port me"
10102# endif
10103 cbInstr = sizeof(paNative[0]);
10104 }
10105 offNative += cbInstr / sizeof(paNative[0]);
10106
10107# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
10108 cs_insn *pInstr;
10109 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
10110 (uintptr_t)pNativeCur, 1, &pInstr);
10111 if (cInstrs > 0)
10112 {
10113 Assert(cInstrs == 1);
10114 const char *pszAnnotation = NULL;
10115# if defined(RT_ARCH_ARM64)
10116 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
10117 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
10118 {
10119 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
10120 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
10121 char *psz = strchr(pInstr->op_str, '[');
10122 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
10123 {
10124 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
10125 int32_t off = -1;
10126 psz += 4;
10127 if (*psz == ']')
10128 off = 0;
10129 else if (*psz == ',')
10130 {
10131 psz = RTStrStripL(psz + 1);
10132 if (*psz == '#')
10133 off = RTStrToInt32(&psz[1]);
10134 /** @todo deal with index registers and LSL as well... */
10135 }
10136 if (off >= 0)
10137 pszAnnotation = iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
10138 }
10139 }
10140# endif
10141
10142 size_t const cchOp = strlen(pInstr->op_str);
10143# if defined(RT_ARCH_AMD64)
10144 if (pszAnnotation)
10145 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
10146 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
10147 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
10148 else
10149 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
10150 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
10151
10152# else
10153 if (pszAnnotation)
10154 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
10155 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
10156 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
10157 else
10158 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
10159 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
10160# endif
10161 offNative += pInstr->size / sizeof(*pNativeCur);
10162 cs_free(pInstr, cInstrs);
10163 }
10164 else
10165 {
10166# if defined(RT_ARCH_AMD64)
10167 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
10168 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
10169# else
10170 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
10171# endif
10172 offNative++;
10173 }
10174# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
10175 }
10176 }
10177 else
10178#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
10179 {
10180 /*
10181 * No debug info, just disassemble the x86 code and then the native code.
10182 *
10183 * First the guest code:
10184 */
10185 for (unsigned i = 0; i < pTb->cRanges; i++)
10186 {
10187 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
10188 + (pTb->aRanges[i].idxPhysPage == 0
10189 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
10190 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
10191 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
10192 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
10193 unsigned off = pTb->aRanges[i].offOpcodes;
10194 /** @todo this ain't working when crossing pages! */
10195 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
10196 while (off < cbOpcodes)
10197 {
10198 uint32_t cbInstr = 1;
10199 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
10200 &pTb->pabOpcodes[off], cbOpcodes - off,
10201 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
10202 if (RT_SUCCESS(rc))
10203 {
10204 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
10205 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
10206 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
10207 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
10208 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
10209 GCPhysPc += cbInstr;
10210 off += cbInstr;
10211 }
10212 else
10213 {
10214 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
10215 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
10216 break;
10217 }
10218 }
10219 }
10220
10221 /*
10222 * Then the native code:
10223 */
10224 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
10225 while (offNative < cNative)
10226 {
10227 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
10228# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
10229 uint32_t cbInstr = sizeof(paNative[0]);
10230 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
10231 if (RT_SUCCESS(rc))
10232 {
10233# if defined(RT_ARCH_AMD64)
10234 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
10235 {
10236 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
10237 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
10238 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
10239 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
10240 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
10241 uInfo & 0x8000 ? "recompiled" : "todo");
10242 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
10243 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
10244 else
10245 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
10246 }
10247 else
10248# endif
10249 {
10250# ifdef RT_ARCH_AMD64
10251 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
10252 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
10253 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
10254 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
10255# elif defined(RT_ARCH_ARM64)
10256 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
10257 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
10258 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
10259# else
10260# error "Port me"
10261# endif
10262 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
10263 }
10264 }
10265 else
10266 {
10267# if defined(RT_ARCH_AMD64)
10268 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
10269 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
10270# else
10271 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
10272# endif
10273 cbInstr = sizeof(paNative[0]);
10274 }
10275 offNative += cbInstr / sizeof(paNative[0]);
10276
10277# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
10278 cs_insn *pInstr;
10279 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
10280 (uintptr_t)pNativeCur, 1, &pInstr);
10281 if (cInstrs > 0)
10282 {
10283 Assert(cInstrs == 1);
10284# if defined(RT_ARCH_AMD64)
10285 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
10286 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
10287# else
10288 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
10289 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
10290# endif
10291 offNative += pInstr->size / sizeof(*pNativeCur);
10292 cs_free(pInstr, cInstrs);
10293 }
10294 else
10295 {
10296# if defined(RT_ARCH_AMD64)
10297 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
10298 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
10299# else
10300 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
10301# endif
10302 offNative++;
10303 }
10304# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
10305 }
10306 }
10307
10308#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
10309 /* Cleanup. */
10310 cs_close(&hDisasm);
10311#endif
10312}
10313
10314
10315/**
10316 * Recompiles the given threaded TB into a native one.
10317 *
10318 * In case of failure the translation block will be returned as-is.
10319 *
10320 * @returns pTb.
10321 * @param pVCpu The cross context virtual CPU structure of the calling
10322 * thread.
10323 * @param pTb The threaded translation to recompile to native.
10324 */
10325DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
10326{
10327 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
10328
10329 /*
10330 * The first time thru, we allocate the recompiler state, the other times
10331 * we just need to reset it before using it again.
10332 */
10333 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
10334 if (RT_LIKELY(pReNative))
10335 iemNativeReInit(pReNative, pTb);
10336 else
10337 {
10338 pReNative = iemNativeInit(pVCpu, pTb);
10339 AssertReturn(pReNative, pTb);
10340 }
10341
10342#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
10343 /*
10344 * First do liveness analysis. This is done backwards.
10345 */
10346 {
10347 uint32_t idxCall = pTb->Thrd.cCalls;
10348 if (idxCall <= pReNative->cLivenessEntriesAlloc)
10349 { /* likely */ }
10350 else
10351 {
10352 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
10353 while (idxCall > cAlloc)
10354 cAlloc *= 2;
10355 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
10356 AssertReturn(pvNew, pTb);
10357 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
10358 pReNative->cLivenessEntriesAlloc = cAlloc;
10359 }
10360 AssertReturn(idxCall > 0, pTb);
10361 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
10362
10363 /* The initial (final) entry. */
10364 idxCall--;
10365 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
10366
10367 /* Loop backwards thru the calls and fill in the other entries. */
10368 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
10369 while (idxCall > 0)
10370 {
10371 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
10372 if (pfnLiveness)
10373 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
10374 else
10375 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
10376 pCallEntry--;
10377 idxCall--;
10378 }
10379
10380# ifdef VBOX_WITH_STATISTICS
10381 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
10382 to 'clobbered' rather that 'input'. */
10383 /** @todo */
10384# endif
10385 }
10386#endif
10387
10388 /*
10389 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
10390 * for aborting if an error happens.
10391 */
10392 uint32_t cCallsLeft = pTb->Thrd.cCalls;
10393#ifdef LOG_ENABLED
10394 uint32_t const cCallsOrg = cCallsLeft;
10395#endif
10396 uint32_t off = 0;
10397 int rc = VINF_SUCCESS;
10398 IEMNATIVE_TRY_SETJMP(pReNative, rc)
10399 {
10400 /*
10401 * Emit prolog code (fixed).
10402 */
10403 off = iemNativeEmitProlog(pReNative, off);
10404
10405 /*
10406 * Convert the calls to native code.
10407 */
10408#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10409 int32_t iGstInstr = -1;
10410#endif
10411#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
10412 uint32_t cThreadedCalls = 0;
10413 uint32_t cRecompiledCalls = 0;
10414#endif
10415#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
10416 uint32_t idxCurCall = 0;
10417#endif
10418 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
10419 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
10420 while (cCallsLeft-- > 0)
10421 {
10422 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
10423#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
10424 pReNative->idxCurCall = idxCurCall;
10425#endif
10426
10427 /*
10428 * Debug info, assembly markup and statistics.
10429 */
10430#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
10431 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
10432 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
10433#endif
10434#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10435 iemNativeDbgInfoAddNativeOffset(pReNative, off);
10436 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
10437 {
10438 if (iGstInstr < (int32_t)pTb->cInstructions)
10439 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
10440 else
10441 Assert(iGstInstr == pTb->cInstructions);
10442 iGstInstr = pCallEntry->idxInstr;
10443 }
10444 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
10445#endif
10446#if defined(VBOX_STRICT)
10447 off = iemNativeEmitMarker(pReNative, off,
10448 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
10449#endif
10450#if defined(VBOX_STRICT)
10451 iemNativeRegAssertSanity(pReNative);
10452#endif
10453#ifdef VBOX_WITH_STATISTICS
10454 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
10455#endif
10456
10457 /*
10458 * Actual work.
10459 */
10460 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
10461 pfnRecom ? "(recompiled)" : "(todo)"));
10462 if (pfnRecom) /** @todo stats on this. */
10463 {
10464 off = pfnRecom(pReNative, off, pCallEntry);
10465 STAM_REL_STATS({cRecompiledCalls++;});
10466 }
10467 else
10468 {
10469 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
10470 STAM_REL_STATS({cThreadedCalls++;});
10471 }
10472 Assert(off <= pReNative->cInstrBufAlloc);
10473 Assert(pReNative->cCondDepth == 0);
10474
10475#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
10476 if (LogIs2Enabled())
10477 {
10478 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
10479# ifndef IEMLIVENESS_EXTENDED_LAYOUT
10480 static const char s_achState[] = "CUXI";
10481# else
10482 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
10483# endif
10484
10485 char szGpr[17];
10486 for (unsigned i = 0; i < 16; i++)
10487 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
10488 szGpr[16] = '\0';
10489
10490 char szSegBase[X86_SREG_COUNT + 1];
10491 char szSegLimit[X86_SREG_COUNT + 1];
10492 char szSegAttrib[X86_SREG_COUNT + 1];
10493 char szSegSel[X86_SREG_COUNT + 1];
10494 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
10495 {
10496 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
10497 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
10498 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
10499 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
10500 }
10501 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
10502 = szSegSel[X86_SREG_COUNT] = '\0';
10503
10504 char szEFlags[8];
10505 for (unsigned i = 0; i < 7; i++)
10506 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
10507 szEFlags[7] = '\0';
10508
10509 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
10510 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
10511 }
10512#endif
10513
10514 /*
10515 * Advance.
10516 */
10517 pCallEntry++;
10518#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
10519 idxCurCall++;
10520#endif
10521 }
10522
10523 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
10524 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
10525 if (!cThreadedCalls)
10526 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
10527
10528 /*
10529 * Emit the epilog code.
10530 */
10531 uint32_t idxReturnLabel;
10532 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
10533
10534 /*
10535 * Generate special jump labels.
10536 */
10537 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
10538 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
10539 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
10540 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
10541
10542 /*
10543 * Generate simple TB tail labels that just calls a help with a pVCpu
10544 * arg and either return or longjmps/throws a non-zero status.
10545 *
10546 * The array entries must be ordered by enmLabel value so we can index
10547 * using fTailLabels bit numbers.
10548 */
10549 typedef IEM_DECL_NATIVE_HLP_PTR(int, PFNIEMNATIVESIMPLETAILLABELCALL,(PVMCPUCC pVCpu));
10550 static struct
10551 {
10552 IEMNATIVELABELTYPE enmLabel;
10553 PFNIEMNATIVESIMPLETAILLABELCALL pfnCallback;
10554 } const g_aSimpleTailLabels[] =
10555 {
10556 { kIemNativeLabelType_Invalid, NULL },
10557 { kIemNativeLabelType_RaiseDe, iemNativeHlpExecRaiseDe },
10558 { kIemNativeLabelType_RaiseUd, iemNativeHlpExecRaiseUd },
10559 { kIemNativeLabelType_RaiseSseRelated, iemNativeHlpExecRaiseSseRelated },
10560 { kIemNativeLabelType_RaiseAvxRelated, iemNativeHlpExecRaiseAvxRelated },
10561 { kIemNativeLabelType_RaiseSseAvxFpRelated, iemNativeHlpExecRaiseSseAvxFpRelated },
10562 { kIemNativeLabelType_RaiseNm, iemNativeHlpExecRaiseNm },
10563 { kIemNativeLabelType_RaiseGp0, iemNativeHlpExecRaiseGp0 },
10564 { kIemNativeLabelType_RaiseMf, iemNativeHlpExecRaiseMf },
10565 { kIemNativeLabelType_RaiseXf, iemNativeHlpExecRaiseXf },
10566 { kIemNativeLabelType_ObsoleteTb, iemNativeHlpObsoleteTb },
10567 { kIemNativeLabelType_NeedCsLimChecking, iemNativeHlpNeedCsLimChecking },
10568 { kIemNativeLabelType_CheckBranchMiss, iemNativeHlpCheckBranchMiss },
10569 };
10570 AssertCompile(RT_ELEMENTS(g_aSimpleTailLabels) == (unsigned)kIemNativeLabelType_LastSimple + 1U);
10571 AssertCompile(kIemNativeLabelType_Invalid == 0);
10572 uint64_t fTailLabels = pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastSimple + 1U) - 2U);
10573 if (fTailLabels)
10574 {
10575 do
10576 {
10577 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)(ASMBitFirstSetU64(fTailLabels) - 1U);
10578 fTailLabels &= ~RT_BIT_64(enmLabel);
10579 Assert(g_aSimpleTailLabels[enmLabel].enmLabel == enmLabel);
10580
10581 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
10582 Assert(idxLabel != UINT32_MAX);
10583 if (idxLabel != UINT32_MAX)
10584 {
10585 iemNativeLabelDefine(pReNative, idxLabel, off);
10586
10587 /* int pfnCallback(PVMCPUCC pVCpu) */
10588 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
10589 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_aSimpleTailLabels[enmLabel].pfnCallback);
10590
10591 /* jump back to the return sequence. */
10592 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
10593 }
10594
10595 } while (fTailLabels);
10596 }
10597 }
10598 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
10599 {
10600 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
10601 return pTb;
10602 }
10603 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
10604 Assert(off <= pReNative->cInstrBufAlloc);
10605
10606 /*
10607 * Make sure all labels has been defined.
10608 */
10609 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
10610#ifdef VBOX_STRICT
10611 uint32_t const cLabels = pReNative->cLabels;
10612 for (uint32_t i = 0; i < cLabels; i++)
10613 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
10614#endif
10615
10616 /*
10617 * Allocate executable memory, copy over the code we've generated.
10618 */
10619 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
10620 if (pTbAllocator->pDelayedFreeHead)
10621 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
10622
10623 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR), pTb);
10624 AssertReturn(paFinalInstrBuf, pTb);
10625 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
10626
10627 /*
10628 * Apply fixups.
10629 */
10630 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
10631 uint32_t const cFixups = pReNative->cFixups;
10632 for (uint32_t i = 0; i < cFixups; i++)
10633 {
10634 Assert(paFixups[i].off < off);
10635 Assert(paFixups[i].idxLabel < cLabels);
10636 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
10637 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
10638 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
10639 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
10640 switch (paFixups[i].enmType)
10641 {
10642#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
10643 case kIemNativeFixupType_Rel32:
10644 Assert(paFixups[i].off + 4 <= off);
10645 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10646 continue;
10647
10648#elif defined(RT_ARCH_ARM64)
10649 case kIemNativeFixupType_RelImm26At0:
10650 {
10651 Assert(paFixups[i].off < off);
10652 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10653 Assert(offDisp >= -262144 && offDisp < 262144);
10654 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
10655 continue;
10656 }
10657
10658 case kIemNativeFixupType_RelImm19At5:
10659 {
10660 Assert(paFixups[i].off < off);
10661 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10662 Assert(offDisp >= -262144 && offDisp < 262144);
10663 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
10664 continue;
10665 }
10666
10667 case kIemNativeFixupType_RelImm14At5:
10668 {
10669 Assert(paFixups[i].off < off);
10670 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10671 Assert(offDisp >= -8192 && offDisp < 8192);
10672 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
10673 continue;
10674 }
10675
10676#endif
10677 case kIemNativeFixupType_Invalid:
10678 case kIemNativeFixupType_End:
10679 break;
10680 }
10681 AssertFailed();
10682 }
10683
10684 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
10685 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
10686
10687 /*
10688 * Convert the translation block.
10689 */
10690 RTMemFree(pTb->Thrd.paCalls);
10691 pTb->Native.paInstructions = paFinalInstrBuf;
10692 pTb->Native.cInstructions = off;
10693 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
10694#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10695 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
10696 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
10697#endif
10698
10699 Assert(pTbAllocator->cThreadedTbs > 0);
10700 pTbAllocator->cThreadedTbs -= 1;
10701 pTbAllocator->cNativeTbs += 1;
10702 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
10703
10704#ifdef LOG_ENABLED
10705 /*
10706 * Disassemble to the log if enabled.
10707 */
10708 if (LogIs3Enabled())
10709 {
10710 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
10711 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
10712# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
10713 RTLogFlush(NULL);
10714# endif
10715 }
10716#endif
10717 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
10718
10719 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
10720 return pTb;
10721}
10722
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette