VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 103729

Last change on this file since 103729 was 103729, checked in by vboxsync, 13 months ago

VMM/IEM: Initial implementation of a SIMD register allocator and associated code in order to be able to recompile SSE/AVX instructions (disabled by default and only working on ARM64 right now), bugref:10614 [doxygen and scm]

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 730.6 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 103729 2024-03-07 12:24:31Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMN8veRecompilerTlbLookup.h"
103#include "IEMNativeFunctions.h"
104
105
106/*
107 * Narrow down configs here to avoid wasting time on unused configs here.
108 * Note! Same checks in IEMAllThrdRecompiler.cpp.
109 */
110
111#ifndef IEM_WITH_CODE_TLB
112# error The code TLB must be enabled for the recompiler.
113#endif
114
115#ifndef IEM_WITH_DATA_TLB
116# error The data TLB must be enabled for the recompiler.
117#endif
118
119#ifndef IEM_WITH_SETJMP
120# error The setjmp approach must be enabled for the recompiler.
121#endif
122
123/** @todo eliminate this clang build hack. */
124#if RT_CLANG_PREREQ(4, 0)
125# pragma GCC diagnostic ignored "-Wunused-function"
126#endif
127
128
129/*********************************************************************************************************************************
130* Internal Functions *
131*********************************************************************************************************************************/
132#ifdef VBOX_STRICT
133static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
134 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg);
135# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
136static uint32_t iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
137 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz);
138# endif
139static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative);
140#endif
141#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
142static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off);
143static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
144#endif
145DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
146DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
147 IEMNATIVEGSTREG enmGstReg, uint32_t off);
148DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
149
150
151/*********************************************************************************************************************************
152* Executable Memory Allocator *
153*********************************************************************************************************************************/
154/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
155 * Use an alternative chunk sub-allocator that does store internal data
156 * in the chunk.
157 *
158 * Using the RTHeapSimple is not practial on newer darwin systems where
159 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
160 * memory. We would have to change the protection of the whole chunk for
161 * every call to RTHeapSimple, which would be rather expensive.
162 *
163 * This alternative implemenation let restrict page protection modifications
164 * to the pages backing the executable memory we just allocated.
165 */
166#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
167/** The chunk sub-allocation unit size in bytes. */
168#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
169/** The chunk sub-allocation unit size as a shift factor. */
170#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
171
172#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
173# ifdef IEMNATIVE_USE_GDB_JIT
174# define IEMNATIVE_USE_GDB_JIT_ET_DYN
175
176/** GDB JIT: Code entry. */
177typedef struct GDBJITCODEENTRY
178{
179 struct GDBJITCODEENTRY *pNext;
180 struct GDBJITCODEENTRY *pPrev;
181 uint8_t *pbSymFile;
182 uint64_t cbSymFile;
183} GDBJITCODEENTRY;
184
185/** GDB JIT: Actions. */
186typedef enum GDBJITACTIONS : uint32_t
187{
188 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
189} GDBJITACTIONS;
190
191/** GDB JIT: Descriptor. */
192typedef struct GDBJITDESCRIPTOR
193{
194 uint32_t uVersion;
195 GDBJITACTIONS enmAction;
196 GDBJITCODEENTRY *pRelevant;
197 GDBJITCODEENTRY *pHead;
198 /** Our addition: */
199 GDBJITCODEENTRY *pTail;
200} GDBJITDESCRIPTOR;
201
202/** GDB JIT: Our simple symbol file data. */
203typedef struct GDBJITSYMFILE
204{
205 Elf64_Ehdr EHdr;
206# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
207 Elf64_Shdr aShdrs[5];
208# else
209 Elf64_Shdr aShdrs[7];
210 Elf64_Phdr aPhdrs[2];
211# endif
212 /** The dwarf ehframe data for the chunk. */
213 uint8_t abEhFrame[512];
214 char szzStrTab[128];
215 Elf64_Sym aSymbols[3];
216# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
217 Elf64_Sym aDynSyms[2];
218 Elf64_Dyn aDyn[6];
219# endif
220} GDBJITSYMFILE;
221
222extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
223extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
224
225/** Init once for g_IemNativeGdbJitLock. */
226static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
227/** Init once for the critical section. */
228static RTCRITSECT g_IemNativeGdbJitLock;
229
230/** GDB reads the info here. */
231GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
232
233/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
234DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
235{
236 ASMNopPause();
237}
238
239/** @callback_method_impl{FNRTONCE} */
240static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
241{
242 RT_NOREF(pvUser);
243 return RTCritSectInit(&g_IemNativeGdbJitLock);
244}
245
246
247# endif /* IEMNATIVE_USE_GDB_JIT */
248
249/**
250 * Per-chunk unwind info for non-windows hosts.
251 */
252typedef struct IEMEXECMEMCHUNKEHFRAME
253{
254# ifdef IEMNATIVE_USE_LIBUNWIND
255 /** The offset of the FDA into abEhFrame. */
256 uintptr_t offFda;
257# else
258 /** 'struct object' storage area. */
259 uint8_t abObject[1024];
260# endif
261# ifdef IEMNATIVE_USE_GDB_JIT
262# if 0
263 /** The GDB JIT 'symbol file' data. */
264 GDBJITSYMFILE GdbJitSymFile;
265# endif
266 /** The GDB JIT list entry. */
267 GDBJITCODEENTRY GdbJitEntry;
268# endif
269 /** The dwarf ehframe data for the chunk. */
270 uint8_t abEhFrame[512];
271} IEMEXECMEMCHUNKEHFRAME;
272/** Pointer to per-chunk info info for non-windows hosts. */
273typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
274#endif
275
276
277/**
278 * An chunk of executable memory.
279 */
280typedef struct IEMEXECMEMCHUNK
281{
282#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
283 /** Number of free items in this chunk. */
284 uint32_t cFreeUnits;
285 /** Hint were to start searching for free space in the allocation bitmap. */
286 uint32_t idxFreeHint;
287#else
288 /** The heap handle. */
289 RTHEAPSIMPLE hHeap;
290#endif
291 /** Pointer to the chunk. */
292 void *pvChunk;
293#ifdef IN_RING3
294 /**
295 * Pointer to the unwind information.
296 *
297 * This is used during C++ throw and longjmp (windows and probably most other
298 * platforms). Some debuggers (windbg) makes use of it as well.
299 *
300 * Windows: This is allocated from hHeap on windows because (at least for
301 * AMD64) the UNWIND_INFO structure address in the
302 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
303 *
304 * Others: Allocated from the regular heap to avoid unnecessary executable data
305 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
306 void *pvUnwindInfo;
307#elif defined(IN_RING0)
308 /** Allocation handle. */
309 RTR0MEMOBJ hMemObj;
310#endif
311} IEMEXECMEMCHUNK;
312/** Pointer to a memory chunk. */
313typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
314
315
316/**
317 * Executable memory allocator for the native recompiler.
318 */
319typedef struct IEMEXECMEMALLOCATOR
320{
321 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
322 uint32_t uMagic;
323
324 /** The chunk size. */
325 uint32_t cbChunk;
326 /** The maximum number of chunks. */
327 uint32_t cMaxChunks;
328 /** The current number of chunks. */
329 uint32_t cChunks;
330 /** Hint where to start looking for available memory. */
331 uint32_t idxChunkHint;
332 /** Statistics: Current number of allocations. */
333 uint32_t cAllocations;
334
335 /** The total amount of memory available. */
336 uint64_t cbTotal;
337 /** Total amount of free memory. */
338 uint64_t cbFree;
339 /** Total amount of memory allocated. */
340 uint64_t cbAllocated;
341
342#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
343 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
344 *
345 * Since the chunk size is a power of two and the minimum chunk size is a lot
346 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
347 * require a whole number of uint64_t elements in the allocation bitmap. So,
348 * for sake of simplicity, they are allocated as one continous chunk for
349 * simplicity/laziness. */
350 uint64_t *pbmAlloc;
351 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
352 uint32_t cUnitsPerChunk;
353 /** Number of bitmap elements per chunk (for quickly locating the bitmap
354 * portion corresponding to an chunk). */
355 uint32_t cBitmapElementsPerChunk;
356#else
357 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
358 * @{ */
359 /** The size of the heap internal block header. This is used to adjust the
360 * request memory size to make sure there is exacly enough room for a header at
361 * the end of the blocks we allocate before the next 64 byte alignment line. */
362 uint32_t cbHeapBlockHdr;
363 /** The size of initial heap allocation required make sure the first
364 * allocation is correctly aligned. */
365 uint32_t cbHeapAlignTweak;
366 /** The alignment tweak allocation address. */
367 void *pvAlignTweak;
368 /** @} */
369#endif
370
371#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
372 /** Pointer to the array of unwind info running parallel to aChunks (same
373 * allocation as this structure, located after the bitmaps).
374 * (For Windows, the structures must reside in 32-bit RVA distance to the
375 * actual chunk, so they are allocated off the chunk.) */
376 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
377#endif
378
379 /** The allocation chunks. */
380 RT_FLEXIBLE_ARRAY_EXTENSION
381 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
382} IEMEXECMEMALLOCATOR;
383/** Pointer to an executable memory allocator. */
384typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
385
386/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
387#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
388
389
390static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
391
392
393/**
394 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
395 * the heap statistics.
396 */
397static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
398 uint32_t cbReq, uint32_t idxChunk)
399{
400 pExecMemAllocator->cAllocations += 1;
401 pExecMemAllocator->cbAllocated += cbReq;
402#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
403 pExecMemAllocator->cbFree -= cbReq;
404#else
405 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
406#endif
407 pExecMemAllocator->idxChunkHint = idxChunk;
408
409#ifdef RT_OS_DARWIN
410 /*
411 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
412 * on darwin. So, we mark the pages returned as read+write after alloc and
413 * expect the caller to call iemExecMemAllocatorReadyForUse when done
414 * writing to the allocation.
415 *
416 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
417 * for details.
418 */
419 /** @todo detect if this is necessary... it wasn't required on 10.15 or
420 * whatever older version it was. */
421 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
422 AssertRC(rc);
423#endif
424
425 return pvRet;
426}
427
428
429#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
430static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
431 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
432{
433 /*
434 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
435 */
436 Assert(!(cToScan & 63));
437 Assert(!(idxFirst & 63));
438 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
439 pbmAlloc += idxFirst / 64;
440
441 /*
442 * Scan the bitmap for cReqUnits of consequtive clear bits
443 */
444 /** @todo This can probably be done more efficiently for non-x86 systems. */
445 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
446 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
447 {
448 uint32_t idxAddBit = 1;
449 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
450 idxAddBit++;
451 if (idxAddBit >= cReqUnits)
452 {
453 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
454
455 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
456 pChunk->cFreeUnits -= cReqUnits;
457 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
458
459 void * const pvRet = (uint8_t *)pChunk->pvChunk
460 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
461
462 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
463 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
464 }
465
466 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
467 }
468 return NULL;
469}
470#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
471
472
473static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
474{
475#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
476 /*
477 * Figure out how much to allocate.
478 */
479 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
480 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
481 {
482 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
483 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
484 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
485 {
486 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
487 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
488 if (pvRet)
489 return pvRet;
490 }
491 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
492 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
493 cReqUnits, idxChunk);
494 }
495#else
496 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
497 if (pvRet)
498 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
499#endif
500 return NULL;
501
502}
503
504
505/**
506 * Allocates @a cbReq bytes of executable memory.
507 *
508 * @returns Pointer to the memory, NULL if out of memory or other problem
509 * encountered.
510 * @param pVCpu The cross context virtual CPU structure of the calling
511 * thread.
512 * @param cbReq How many bytes are required.
513 */
514static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
515{
516 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
517 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
518 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
519
520
521 for (unsigned iIteration = 0;; iIteration++)
522 {
523 /*
524 * Adjust the request size so it'll fit the allocator alignment/whatnot.
525 *
526 * For the RTHeapSimple allocator this means to follow the logic described
527 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
528 * existing chunks if we think we've got sufficient free memory around.
529 *
530 * While for the alternative one we just align it up to a whole unit size.
531 */
532#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
533 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
534#else
535 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
536#endif
537 if (cbReq <= pExecMemAllocator->cbFree)
538 {
539 uint32_t const cChunks = pExecMemAllocator->cChunks;
540 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
541 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
542 {
543 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
544 if (pvRet)
545 return pvRet;
546 }
547 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
548 {
549 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
550 if (pvRet)
551 return pvRet;
552 }
553 }
554
555 /*
556 * Can we grow it with another chunk?
557 */
558 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
559 {
560 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
561 AssertLogRelRCReturn(rc, NULL);
562
563 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
564 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
565 if (pvRet)
566 return pvRet;
567 AssertFailed();
568 }
569
570 /*
571 * Try prune native TBs once.
572 */
573 if (iIteration == 0)
574 iemTbAllocatorFreeupNativeSpace(pVCpu, cbReq / sizeof(IEMNATIVEINSTR));
575 else
576 {
577 /** @todo stats... */
578 return NULL;
579 }
580 }
581
582}
583
584
585/** This is a hook that we may need later for changing memory protection back
586 * to readonly+exec */
587static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
588{
589#ifdef RT_OS_DARWIN
590 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
591 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
592 AssertRC(rc); RT_NOREF(pVCpu);
593
594 /*
595 * Flush the instruction cache:
596 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
597 */
598 /* sys_dcache_flush(pv, cb); - not necessary */
599 sys_icache_invalidate(pv, cb);
600#else
601 RT_NOREF(pVCpu, pv, cb);
602#endif
603}
604
605
606/**
607 * Frees executable memory.
608 */
609void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
610{
611 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
612 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
613 Assert(pv);
614#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
615 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
616#else
617 Assert(!((uintptr_t)pv & 63));
618#endif
619
620 /* Align the size as we did when allocating the block. */
621#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
622 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
623#else
624 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
625#endif
626
627 /* Free it / assert sanity. */
628#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
629 uint32_t const cChunks = pExecMemAllocator->cChunks;
630 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
631 bool fFound = false;
632 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
633 {
634 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
635 fFound = offChunk < cbChunk;
636 if (fFound)
637 {
638#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
639 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
640 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
641
642 /* Check that it's valid and free it. */
643 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
644 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
645 for (uint32_t i = 1; i < cReqUnits; i++)
646 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
647 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
648
649 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
650 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
651
652 /* Update the stats. */
653 pExecMemAllocator->cbAllocated -= cb;
654 pExecMemAllocator->cbFree += cb;
655 pExecMemAllocator->cAllocations -= 1;
656 return;
657#else
658 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
659 break;
660#endif
661 }
662 }
663# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
664 AssertFailed();
665# else
666 Assert(fFound);
667# endif
668#endif
669
670#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
671 /* Update stats while cb is freshly calculated.*/
672 pExecMemAllocator->cbAllocated -= cb;
673 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
674 pExecMemAllocator->cAllocations -= 1;
675
676 /* Free it. */
677 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
678#endif
679}
680
681
682
683#ifdef IN_RING3
684# ifdef RT_OS_WINDOWS
685
686/**
687 * Initializes the unwind info structures for windows hosts.
688 */
689static int
690iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
691 void *pvChunk, uint32_t idxChunk)
692{
693 RT_NOREF(pVCpu);
694
695 /*
696 * The AMD64 unwind opcodes.
697 *
698 * This is a program that starts with RSP after a RET instruction that
699 * ends up in recompiled code, and the operations we describe here will
700 * restore all non-volatile registers and bring RSP back to where our
701 * RET address is. This means it's reverse order from what happens in
702 * the prologue.
703 *
704 * Note! Using a frame register approach here both because we have one
705 * and but mainly because the UWOP_ALLOC_LARGE argument values
706 * would be a pain to write initializers for. On the positive
707 * side, we're impervious to changes in the the stack variable
708 * area can can deal with dynamic stack allocations if necessary.
709 */
710 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
711 {
712 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
713 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
714 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
715 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
716 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
717 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
718 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
719 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
720 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
721 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
722 };
723 union
724 {
725 IMAGE_UNWIND_INFO Info;
726 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
727 } s_UnwindInfo =
728 {
729 {
730 /* .Version = */ 1,
731 /* .Flags = */ 0,
732 /* .SizeOfProlog = */ 16, /* whatever */
733 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
734 /* .FrameRegister = */ X86_GREG_xBP,
735 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
736 }
737 };
738 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
739 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
740
741 /*
742 * Calc how much space we need and allocate it off the exec heap.
743 */
744 unsigned const cFunctionEntries = 1;
745 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
746 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
747# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
748 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
749 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
750 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
751# else
752 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
753 - pExecMemAllocator->cbHeapBlockHdr;
754 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
755 32 /*cbAlignment*/);
756# endif
757 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
758 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
759
760 /*
761 * Initialize the structures.
762 */
763 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
764
765 paFunctions[0].BeginAddress = 0;
766 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
767 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
768
769 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
770 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
771
772 /*
773 * Register it.
774 */
775 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
776 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
777
778 return VINF_SUCCESS;
779}
780
781
782# else /* !RT_OS_WINDOWS */
783
784/**
785 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
786 */
787DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
788{
789 if (iValue >= 64)
790 {
791 Assert(iValue < 0x2000);
792 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
793 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
794 }
795 else if (iValue >= 0)
796 *Ptr.pb++ = (uint8_t)iValue;
797 else if (iValue > -64)
798 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
799 else
800 {
801 Assert(iValue > -0x2000);
802 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
803 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
804 }
805 return Ptr;
806}
807
808
809/**
810 * Emits an ULEB128 encoded value (up to 64-bit wide).
811 */
812DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
813{
814 while (uValue >= 0x80)
815 {
816 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
817 uValue >>= 7;
818 }
819 *Ptr.pb++ = (uint8_t)uValue;
820 return Ptr;
821}
822
823
824/**
825 * Emits a CFA rule as register @a uReg + offset @a off.
826 */
827DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
828{
829 *Ptr.pb++ = DW_CFA_def_cfa;
830 Ptr = iemDwarfPutUleb128(Ptr, uReg);
831 Ptr = iemDwarfPutUleb128(Ptr, off);
832 return Ptr;
833}
834
835
836/**
837 * Emits a register (@a uReg) save location:
838 * CFA + @a off * data_alignment_factor
839 */
840DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
841{
842 if (uReg < 0x40)
843 *Ptr.pb++ = DW_CFA_offset | uReg;
844 else
845 {
846 *Ptr.pb++ = DW_CFA_offset_extended;
847 Ptr = iemDwarfPutUleb128(Ptr, uReg);
848 }
849 Ptr = iemDwarfPutUleb128(Ptr, off);
850 return Ptr;
851}
852
853
854# if 0 /* unused */
855/**
856 * Emits a register (@a uReg) save location, using signed offset:
857 * CFA + @a offSigned * data_alignment_factor
858 */
859DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
860{
861 *Ptr.pb++ = DW_CFA_offset_extended_sf;
862 Ptr = iemDwarfPutUleb128(Ptr, uReg);
863 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
864 return Ptr;
865}
866# endif
867
868
869/**
870 * Initializes the unwind info section for non-windows hosts.
871 */
872static int
873iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
874 void *pvChunk, uint32_t idxChunk)
875{
876 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
877 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
878
879 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
880
881 /*
882 * Generate the CIE first.
883 */
884# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
885 uint8_t const iDwarfVer = 3;
886# else
887 uint8_t const iDwarfVer = 4;
888# endif
889 RTPTRUNION const PtrCie = Ptr;
890 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
891 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
892 *Ptr.pb++ = iDwarfVer; /* DwARF version */
893 *Ptr.pb++ = 0; /* Augmentation. */
894 if (iDwarfVer >= 4)
895 {
896 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
897 *Ptr.pb++ = 0; /* Segment selector size. */
898 }
899# ifdef RT_ARCH_AMD64
900 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
901# else
902 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
903# endif
904 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
905# ifdef RT_ARCH_AMD64
906 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
907# elif defined(RT_ARCH_ARM64)
908 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
909# else
910# error "port me"
911# endif
912 /* Initial instructions: */
913# ifdef RT_ARCH_AMD64
914 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
915 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
916 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
917 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
918 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
919 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
920 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
921 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
922# elif defined(RT_ARCH_ARM64)
923# if 1
924 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
925# else
926 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
927# endif
928 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
929 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
930 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
931 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
932 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
933 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
934 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
935 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
936 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
937 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
938 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
939 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
940 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
941 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
942# else
943# error "port me"
944# endif
945 while ((Ptr.u - PtrCie.u) & 3)
946 *Ptr.pb++ = DW_CFA_nop;
947 /* Finalize the CIE size. */
948 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
949
950 /*
951 * Generate an FDE for the whole chunk area.
952 */
953# ifdef IEMNATIVE_USE_LIBUNWIND
954 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
955# endif
956 RTPTRUNION const PtrFde = Ptr;
957 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
958 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
959 Ptr.pu32++;
960 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
961 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
962# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
963 *Ptr.pb++ = DW_CFA_nop;
964# endif
965 while ((Ptr.u - PtrFde.u) & 3)
966 *Ptr.pb++ = DW_CFA_nop;
967 /* Finalize the FDE size. */
968 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
969
970 /* Terminator entry. */
971 *Ptr.pu32++ = 0;
972 *Ptr.pu32++ = 0; /* just to be sure... */
973 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
974
975 /*
976 * Register it.
977 */
978# ifdef IEMNATIVE_USE_LIBUNWIND
979 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
980# else
981 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
982 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
983# endif
984
985# ifdef IEMNATIVE_USE_GDB_JIT
986 /*
987 * Now for telling GDB about this (experimental).
988 *
989 * This seems to work best with ET_DYN.
990 */
991 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
992# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
993 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
994 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
995# else
996 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
997 - pExecMemAllocator->cbHeapBlockHdr;
998 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
999# endif
1000 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
1001 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
1002
1003 RT_ZERO(*pSymFile);
1004
1005 /*
1006 * The ELF header:
1007 */
1008 pSymFile->EHdr.e_ident[0] = ELFMAG0;
1009 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1010 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1011 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1012 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1013 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1014 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1015 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1016# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1017 pSymFile->EHdr.e_type = ET_DYN;
1018# else
1019 pSymFile->EHdr.e_type = ET_REL;
1020# endif
1021# ifdef RT_ARCH_AMD64
1022 pSymFile->EHdr.e_machine = EM_AMD64;
1023# elif defined(RT_ARCH_ARM64)
1024 pSymFile->EHdr.e_machine = EM_AARCH64;
1025# else
1026# error "port me"
1027# endif
1028 pSymFile->EHdr.e_version = 1; /*?*/
1029 pSymFile->EHdr.e_entry = 0;
1030# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1031 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1032# else
1033 pSymFile->EHdr.e_phoff = 0;
1034# endif
1035 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1036 pSymFile->EHdr.e_flags = 0;
1037 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1038# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1039 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1040 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1041# else
1042 pSymFile->EHdr.e_phentsize = 0;
1043 pSymFile->EHdr.e_phnum = 0;
1044# endif
1045 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1046 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1047 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1048
1049 uint32_t offStrTab = 0;
1050#define APPEND_STR(a_szStr) do { \
1051 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1052 offStrTab += sizeof(a_szStr); \
1053 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1054 } while (0)
1055#define APPEND_STR_FMT(a_szStr, ...) do { \
1056 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1057 offStrTab++; \
1058 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1059 } while (0)
1060
1061 /*
1062 * Section headers.
1063 */
1064 /* Section header #0: NULL */
1065 unsigned i = 0;
1066 APPEND_STR("");
1067 RT_ZERO(pSymFile->aShdrs[i]);
1068 i++;
1069
1070 /* Section header: .eh_frame */
1071 pSymFile->aShdrs[i].sh_name = offStrTab;
1072 APPEND_STR(".eh_frame");
1073 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1074 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1075# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1076 pSymFile->aShdrs[i].sh_offset
1077 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1078# else
1079 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1080 pSymFile->aShdrs[i].sh_offset = 0;
1081# endif
1082
1083 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1084 pSymFile->aShdrs[i].sh_link = 0;
1085 pSymFile->aShdrs[i].sh_info = 0;
1086 pSymFile->aShdrs[i].sh_addralign = 1;
1087 pSymFile->aShdrs[i].sh_entsize = 0;
1088 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1089 i++;
1090
1091 /* Section header: .shstrtab */
1092 unsigned const iShStrTab = i;
1093 pSymFile->EHdr.e_shstrndx = iShStrTab;
1094 pSymFile->aShdrs[i].sh_name = offStrTab;
1095 APPEND_STR(".shstrtab");
1096 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1097 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1098# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1099 pSymFile->aShdrs[i].sh_offset
1100 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1101# else
1102 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1103 pSymFile->aShdrs[i].sh_offset = 0;
1104# endif
1105 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1106 pSymFile->aShdrs[i].sh_link = 0;
1107 pSymFile->aShdrs[i].sh_info = 0;
1108 pSymFile->aShdrs[i].sh_addralign = 1;
1109 pSymFile->aShdrs[i].sh_entsize = 0;
1110 i++;
1111
1112 /* Section header: .symbols */
1113 pSymFile->aShdrs[i].sh_name = offStrTab;
1114 APPEND_STR(".symtab");
1115 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1116 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1117 pSymFile->aShdrs[i].sh_offset
1118 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1119 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1120 pSymFile->aShdrs[i].sh_link = iShStrTab;
1121 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1122 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1123 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1124 i++;
1125
1126# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1127 /* Section header: .symbols */
1128 pSymFile->aShdrs[i].sh_name = offStrTab;
1129 APPEND_STR(".dynsym");
1130 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1131 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1132 pSymFile->aShdrs[i].sh_offset
1133 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1134 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1135 pSymFile->aShdrs[i].sh_link = iShStrTab;
1136 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1137 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1138 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1139 i++;
1140# endif
1141
1142# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1143 /* Section header: .dynamic */
1144 pSymFile->aShdrs[i].sh_name = offStrTab;
1145 APPEND_STR(".dynamic");
1146 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1147 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1148 pSymFile->aShdrs[i].sh_offset
1149 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1150 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1151 pSymFile->aShdrs[i].sh_link = iShStrTab;
1152 pSymFile->aShdrs[i].sh_info = 0;
1153 pSymFile->aShdrs[i].sh_addralign = 1;
1154 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1155 i++;
1156# endif
1157
1158 /* Section header: .text */
1159 unsigned const iShText = i;
1160 pSymFile->aShdrs[i].sh_name = offStrTab;
1161 APPEND_STR(".text");
1162 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1163 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1164# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1165 pSymFile->aShdrs[i].sh_offset
1166 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1167# else
1168 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1169 pSymFile->aShdrs[i].sh_offset = 0;
1170# endif
1171 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1172 pSymFile->aShdrs[i].sh_link = 0;
1173 pSymFile->aShdrs[i].sh_info = 0;
1174 pSymFile->aShdrs[i].sh_addralign = 1;
1175 pSymFile->aShdrs[i].sh_entsize = 0;
1176 i++;
1177
1178 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1179
1180# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1181 /*
1182 * The program headers:
1183 */
1184 /* Everything in a single LOAD segment: */
1185 i = 0;
1186 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1187 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1188 pSymFile->aPhdrs[i].p_offset
1189 = pSymFile->aPhdrs[i].p_vaddr
1190 = pSymFile->aPhdrs[i].p_paddr = 0;
1191 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1192 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1193 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1194 i++;
1195 /* The .dynamic segment. */
1196 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1197 pSymFile->aPhdrs[i].p_flags = PF_R;
1198 pSymFile->aPhdrs[i].p_offset
1199 = pSymFile->aPhdrs[i].p_vaddr
1200 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1201 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1202 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1203 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1204 i++;
1205
1206 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1207
1208 /*
1209 * The dynamic section:
1210 */
1211 i = 0;
1212 pSymFile->aDyn[i].d_tag = DT_SONAME;
1213 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1214 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1215 i++;
1216 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1217 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1218 i++;
1219 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1220 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1221 i++;
1222 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1223 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1224 i++;
1225 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1226 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1227 i++;
1228 pSymFile->aDyn[i].d_tag = DT_NULL;
1229 i++;
1230 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1231# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1232
1233 /*
1234 * Symbol tables:
1235 */
1236 /** @todo gdb doesn't seem to really like this ... */
1237 i = 0;
1238 pSymFile->aSymbols[i].st_name = 0;
1239 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1240 pSymFile->aSymbols[i].st_value = 0;
1241 pSymFile->aSymbols[i].st_size = 0;
1242 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1243 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1244# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1245 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1246# endif
1247 i++;
1248
1249 pSymFile->aSymbols[i].st_name = 0;
1250 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1251 pSymFile->aSymbols[i].st_value = 0;
1252 pSymFile->aSymbols[i].st_size = 0;
1253 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1254 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1255 i++;
1256
1257 pSymFile->aSymbols[i].st_name = offStrTab;
1258 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1259# if 0
1260 pSymFile->aSymbols[i].st_shndx = iShText;
1261 pSymFile->aSymbols[i].st_value = 0;
1262# else
1263 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1264 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1265# endif
1266 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1267 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1268 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1269# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1270 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1271 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1272# endif
1273 i++;
1274
1275 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1276 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1277
1278 /*
1279 * The GDB JIT entry and informing GDB.
1280 */
1281 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1282# if 1
1283 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1284# else
1285 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1286# endif
1287
1288 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1289 RTCritSectEnter(&g_IemNativeGdbJitLock);
1290 pEhFrame->GdbJitEntry.pNext = NULL;
1291 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1292 if (__jit_debug_descriptor.pTail)
1293 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1294 else
1295 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1296 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1297 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1298
1299 /* Notify GDB: */
1300 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1301 __jit_debug_register_code();
1302 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1303 RTCritSectLeave(&g_IemNativeGdbJitLock);
1304
1305# else /* !IEMNATIVE_USE_GDB_JIT */
1306 RT_NOREF(pVCpu);
1307# endif /* !IEMNATIVE_USE_GDB_JIT */
1308
1309 return VINF_SUCCESS;
1310}
1311
1312# endif /* !RT_OS_WINDOWS */
1313#endif /* IN_RING3 */
1314
1315
1316/**
1317 * Adds another chunk to the executable memory allocator.
1318 *
1319 * This is used by the init code for the initial allocation and later by the
1320 * regular allocator function when it's out of memory.
1321 */
1322static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1323{
1324 /* Check that we've room for growth. */
1325 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1326 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1327
1328 /* Allocate a chunk. */
1329#ifdef RT_OS_DARWIN
1330 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1331#else
1332 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1333#endif
1334 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1335
1336#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1337 int rc = VINF_SUCCESS;
1338#else
1339 /* Initialize the heap for the chunk. */
1340 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1341 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1342 AssertRC(rc);
1343 if (RT_SUCCESS(rc))
1344 {
1345 /*
1346 * We want the memory to be aligned on 64 byte, so the first time thru
1347 * here we do some exploratory allocations to see how we can achieve this.
1348 * On subsequent runs we only make an initial adjustment allocation, if
1349 * necessary.
1350 *
1351 * Since we own the heap implementation, we know that the internal block
1352 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1353 * so all we need to wrt allocation size adjustments is to add 32 bytes
1354 * to the size, align up by 64 bytes, and subtract 32 bytes.
1355 *
1356 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1357 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1358 * allocation to force subsequent allocations to return 64 byte aligned
1359 * user areas.
1360 */
1361 if (!pExecMemAllocator->cbHeapBlockHdr)
1362 {
1363 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1364 pExecMemAllocator->cbHeapAlignTweak = 64;
1365 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1366 32 /*cbAlignment*/);
1367 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1368
1369 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1370 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1371 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1372 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1373 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1374
1375 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1376 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1377 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1378 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1379 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1380
1381 RTHeapSimpleFree(hHeap, pvTest2);
1382 RTHeapSimpleFree(hHeap, pvTest1);
1383 }
1384 else
1385 {
1386 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1387 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1388 }
1389 if (RT_SUCCESS(rc))
1390#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1391 {
1392 /*
1393 * Add the chunk.
1394 *
1395 * This must be done before the unwind init so windows can allocate
1396 * memory from the chunk when using the alternative sub-allocator.
1397 */
1398 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1399#ifdef IN_RING3
1400 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1401#endif
1402#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1403 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1404#else
1405 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1406 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1407 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1408 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1409#endif
1410
1411 pExecMemAllocator->cChunks = idxChunk + 1;
1412 pExecMemAllocator->idxChunkHint = idxChunk;
1413
1414#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1415 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1416 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1417#else
1418 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1419 pExecMemAllocator->cbTotal += cbFree;
1420 pExecMemAllocator->cbFree += cbFree;
1421#endif
1422
1423#ifdef IN_RING3
1424 /*
1425 * Initialize the unwind information (this cannot really fail atm).
1426 * (This sets pvUnwindInfo.)
1427 */
1428 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1429 if (RT_SUCCESS(rc))
1430#endif
1431 {
1432 return VINF_SUCCESS;
1433 }
1434
1435#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1436 /* Just in case the impossible happens, undo the above up: */
1437 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1438 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1439 pExecMemAllocator->cChunks = idxChunk;
1440 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1441 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1442 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1443 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1444#endif
1445 }
1446#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1447 }
1448#endif
1449 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1450 RT_NOREF(pVCpu);
1451 return rc;
1452}
1453
1454
1455/**
1456 * Initializes the executable memory allocator for native recompilation on the
1457 * calling EMT.
1458 *
1459 * @returns VBox status code.
1460 * @param pVCpu The cross context virtual CPU structure of the calling
1461 * thread.
1462 * @param cbMax The max size of the allocator.
1463 * @param cbInitial The initial allocator size.
1464 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1465 * dependent).
1466 */
1467int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1468{
1469 /*
1470 * Validate input.
1471 */
1472 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1473 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1474 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1475 || cbChunk == 0
1476 || ( RT_IS_POWER_OF_TWO(cbChunk)
1477 && cbChunk >= _1M
1478 && cbChunk <= _256M
1479 && cbChunk <= cbMax),
1480 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1481 VERR_OUT_OF_RANGE);
1482
1483 /*
1484 * Adjust/figure out the chunk size.
1485 */
1486 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1487 {
1488 if (cbMax >= _256M)
1489 cbChunk = _64M;
1490 else
1491 {
1492 if (cbMax < _16M)
1493 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1494 else
1495 cbChunk = (uint32_t)cbMax / 4;
1496 if (!RT_IS_POWER_OF_TWO(cbChunk))
1497 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1498 }
1499 }
1500
1501 if (cbChunk > cbMax)
1502 cbMax = cbChunk;
1503 else
1504 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1505 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1506 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1507
1508 /*
1509 * Allocate and initialize the allocatore instance.
1510 */
1511 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1512#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1513 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1514 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1515 cbNeeded += cbBitmap * cMaxChunks;
1516 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1517 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1518#endif
1519#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1520 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1521 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1522#endif
1523 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1524 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1525 VERR_NO_MEMORY);
1526 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1527 pExecMemAllocator->cbChunk = cbChunk;
1528 pExecMemAllocator->cMaxChunks = cMaxChunks;
1529 pExecMemAllocator->cChunks = 0;
1530 pExecMemAllocator->idxChunkHint = 0;
1531 pExecMemAllocator->cAllocations = 0;
1532 pExecMemAllocator->cbTotal = 0;
1533 pExecMemAllocator->cbFree = 0;
1534 pExecMemAllocator->cbAllocated = 0;
1535#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1536 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1537 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1538 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1539 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1540#endif
1541#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1542 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1543#endif
1544 for (uint32_t i = 0; i < cMaxChunks; i++)
1545 {
1546#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1547 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1548 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1549#else
1550 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1551#endif
1552 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1553#ifdef IN_RING0
1554 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1555#else
1556 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1557#endif
1558 }
1559 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1560
1561 /*
1562 * Do the initial allocations.
1563 */
1564 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1565 {
1566 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1567 AssertLogRelRCReturn(rc, rc);
1568 }
1569
1570 pExecMemAllocator->idxChunkHint = 0;
1571
1572 return VINF_SUCCESS;
1573}
1574
1575
1576/*********************************************************************************************************************************
1577* Native Recompilation *
1578*********************************************************************************************************************************/
1579
1580
1581/**
1582 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1583 */
1584IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1585{
1586 pVCpu->iem.s.cInstructions += idxInstr;
1587 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1588}
1589
1590
1591/**
1592 * Used by TB code when it wants to raise a \#GP(0).
1593 */
1594IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
1595{
1596 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1597#ifndef _MSC_VER
1598 return VINF_IEM_RAISED_XCPT; /* not reached */
1599#endif
1600}
1601
1602
1603/**
1604 * Used by TB code when it wants to raise a \#NM.
1605 */
1606IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
1607{
1608 iemRaiseDeviceNotAvailableJmp(pVCpu);
1609#ifndef _MSC_VER
1610 return VINF_IEM_RAISED_XCPT; /* not reached */
1611#endif
1612}
1613
1614
1615/**
1616 * Used by TB code when it wants to raise a \#UD.
1617 */
1618IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
1619{
1620 iemRaiseUndefinedOpcodeJmp(pVCpu);
1621#ifndef _MSC_VER
1622 return VINF_IEM_RAISED_XCPT; /* not reached */
1623#endif
1624}
1625
1626
1627/**
1628 * Used by TB code when it wants to raise a \#MF.
1629 */
1630IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
1631{
1632 iemRaiseMathFaultJmp(pVCpu);
1633#ifndef _MSC_VER
1634 return VINF_IEM_RAISED_XCPT; /* not reached */
1635#endif
1636}
1637
1638
1639/**
1640 * Used by TB code when it wants to raise a \#XF.
1641 */
1642IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
1643{
1644 iemRaiseSimdFpExceptionJmp(pVCpu);
1645#ifndef _MSC_VER
1646 return VINF_IEM_RAISED_XCPT; /* not reached */
1647#endif
1648}
1649
1650
1651/**
1652 * Used by TB code when detecting opcode changes.
1653 * @see iemThreadeFuncWorkerObsoleteTb
1654 */
1655IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
1656{
1657 /* We set fSafeToFree to false where as we're being called in the context
1658 of a TB callback function, which for native TBs means we cannot release
1659 the executable memory till we've returned our way back to iemTbExec as
1660 that return path codes via the native code generated for the TB. */
1661 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1662 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
1663 return VINF_IEM_REEXEC_BREAK;
1664}
1665
1666
1667/**
1668 * Used by TB code when we need to switch to a TB with CS.LIM checking.
1669 */
1670IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
1671{
1672 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
1673 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1674 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
1675 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
1676 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
1677 return VINF_IEM_REEXEC_BREAK;
1678}
1679
1680
1681/**
1682 * Used by TB code when we missed a PC check after a branch.
1683 */
1684IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
1685{
1686 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
1687 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1688 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
1689 pVCpu->iem.s.pbInstrBuf));
1690 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
1691 return VINF_IEM_REEXEC_BREAK;
1692}
1693
1694
1695
1696/*********************************************************************************************************************************
1697* Helpers: Segmented memory fetches and stores. *
1698*********************************************************************************************************************************/
1699
1700/**
1701 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1702 */
1703IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1704{
1705#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1706 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1707#else
1708 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1709#endif
1710}
1711
1712
1713/**
1714 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1715 * to 16 bits.
1716 */
1717IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1718{
1719#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1720 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1721#else
1722 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1723#endif
1724}
1725
1726
1727/**
1728 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1729 * to 32 bits.
1730 */
1731IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1732{
1733#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1734 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1735#else
1736 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1737#endif
1738}
1739
1740/**
1741 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1742 * to 64 bits.
1743 */
1744IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1745{
1746#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1747 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1748#else
1749 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1750#endif
1751}
1752
1753
1754/**
1755 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1756 */
1757IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1758{
1759#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1760 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1761#else
1762 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1763#endif
1764}
1765
1766
1767/**
1768 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1769 * to 32 bits.
1770 */
1771IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1772{
1773#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1774 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1775#else
1776 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1777#endif
1778}
1779
1780
1781/**
1782 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1783 * to 64 bits.
1784 */
1785IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1786{
1787#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1788 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1789#else
1790 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1791#endif
1792}
1793
1794
1795/**
1796 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1797 */
1798IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1799{
1800#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1801 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1802#else
1803 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1804#endif
1805}
1806
1807
1808/**
1809 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
1810 * to 64 bits.
1811 */
1812IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1813{
1814#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1815 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1816#else
1817 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1818#endif
1819}
1820
1821
1822/**
1823 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1824 */
1825IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1826{
1827#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1828 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
1829#else
1830 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
1831#endif
1832}
1833
1834
1835/**
1836 * Used by TB code to store unsigned 8-bit data w/ segmentation.
1837 */
1838IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
1839{
1840#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1841 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1842#else
1843 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1844#endif
1845}
1846
1847
1848/**
1849 * Used by TB code to store unsigned 16-bit data w/ segmentation.
1850 */
1851IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
1852{
1853#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1854 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1855#else
1856 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1857#endif
1858}
1859
1860
1861/**
1862 * Used by TB code to store unsigned 32-bit data w/ segmentation.
1863 */
1864IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
1865{
1866#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1867 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1868#else
1869 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1870#endif
1871}
1872
1873
1874/**
1875 * Used by TB code to store unsigned 64-bit data w/ segmentation.
1876 */
1877IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
1878{
1879#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1880 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1881#else
1882 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1883#endif
1884}
1885
1886
1887
1888/**
1889 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
1890 */
1891IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1892{
1893#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1894 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1895#else
1896 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1897#endif
1898}
1899
1900
1901/**
1902 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
1903 */
1904IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1905{
1906#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1907 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1908#else
1909 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1910#endif
1911}
1912
1913
1914/**
1915 * Used by TB code to store an 32-bit selector value onto a generic stack.
1916 *
1917 * Intel CPUs doesn't do write a whole dword, thus the special function.
1918 */
1919IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1920{
1921#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1922 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1923#else
1924 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1925#endif
1926}
1927
1928
1929/**
1930 * Used by TB code to push unsigned 64-bit value onto a generic stack.
1931 */
1932IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1933{
1934#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1935 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1936#else
1937 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1938#endif
1939}
1940
1941
1942/**
1943 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1944 */
1945IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1946{
1947#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1948 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1949#else
1950 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
1951#endif
1952}
1953
1954
1955/**
1956 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1957 */
1958IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1959{
1960#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1961 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
1962#else
1963 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
1964#endif
1965}
1966
1967
1968/**
1969 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
1970 */
1971IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1972{
1973#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1974 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
1975#else
1976 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
1977#endif
1978}
1979
1980
1981
1982/*********************************************************************************************************************************
1983* Helpers: Flat memory fetches and stores. *
1984*********************************************************************************************************************************/
1985
1986/**
1987 * Used by TB code to load unsigned 8-bit data w/ flat address.
1988 * @note Zero extending the value to 64-bit to simplify assembly.
1989 */
1990IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1991{
1992#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1993 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1994#else
1995 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1996#endif
1997}
1998
1999
2000/**
2001 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2002 * to 16 bits.
2003 * @note Zero extending the value to 64-bit to simplify assembly.
2004 */
2005IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2006{
2007#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2008 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2009#else
2010 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2011#endif
2012}
2013
2014
2015/**
2016 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2017 * to 32 bits.
2018 * @note Zero extending the value to 64-bit to simplify assembly.
2019 */
2020IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2021{
2022#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2023 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2024#else
2025 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2026#endif
2027}
2028
2029
2030/**
2031 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2032 * to 64 bits.
2033 */
2034IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2035{
2036#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2037 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2038#else
2039 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2040#endif
2041}
2042
2043
2044/**
2045 * Used by TB code to load unsigned 16-bit data w/ flat address.
2046 * @note Zero extending the value to 64-bit to simplify assembly.
2047 */
2048IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2049{
2050#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2051 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2052#else
2053 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2054#endif
2055}
2056
2057
2058/**
2059 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2060 * to 32 bits.
2061 * @note Zero extending the value to 64-bit to simplify assembly.
2062 */
2063IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2064{
2065#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2066 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2067#else
2068 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2069#endif
2070}
2071
2072
2073/**
2074 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2075 * to 64 bits.
2076 * @note Zero extending the value to 64-bit to simplify assembly.
2077 */
2078IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2079{
2080#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2081 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2082#else
2083 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2084#endif
2085}
2086
2087
2088/**
2089 * Used by TB code to load unsigned 32-bit data w/ flat address.
2090 * @note Zero extending the value to 64-bit to simplify assembly.
2091 */
2092IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2093{
2094#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2095 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2096#else
2097 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2098#endif
2099}
2100
2101
2102/**
2103 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
2104 * to 64 bits.
2105 * @note Zero extending the value to 64-bit to simplify assembly.
2106 */
2107IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2108{
2109#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2110 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2111#else
2112 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2113#endif
2114}
2115
2116
2117/**
2118 * Used by TB code to load unsigned 64-bit data w/ flat address.
2119 */
2120IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2121{
2122#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2123 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2124#else
2125 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
2126#endif
2127}
2128
2129
2130/**
2131 * Used by TB code to store unsigned 8-bit data w/ flat address.
2132 */
2133IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
2134{
2135#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2136 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
2137#else
2138 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
2139#endif
2140}
2141
2142
2143/**
2144 * Used by TB code to store unsigned 16-bit data w/ flat address.
2145 */
2146IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2147{
2148#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2149 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
2150#else
2151 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
2152#endif
2153}
2154
2155
2156/**
2157 * Used by TB code to store unsigned 32-bit data w/ flat address.
2158 */
2159IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2160{
2161#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2162 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
2163#else
2164 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
2165#endif
2166}
2167
2168
2169/**
2170 * Used by TB code to store unsigned 64-bit data w/ flat address.
2171 */
2172IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2173{
2174#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2175 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
2176#else
2177 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
2178#endif
2179}
2180
2181
2182
2183/**
2184 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
2185 */
2186IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2187{
2188#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2189 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2190#else
2191 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2192#endif
2193}
2194
2195
2196/**
2197 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
2198 */
2199IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2200{
2201#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2202 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2203#else
2204 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2205#endif
2206}
2207
2208
2209/**
2210 * Used by TB code to store a segment selector value onto a flat stack.
2211 *
2212 * Intel CPUs doesn't do write a whole dword, thus the special function.
2213 */
2214IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2215{
2216#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2217 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2218#else
2219 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2220#endif
2221}
2222
2223
2224/**
2225 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
2226 */
2227IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2228{
2229#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2230 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2231#else
2232 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2233#endif
2234}
2235
2236
2237/**
2238 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2239 */
2240IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2241{
2242#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2243 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2244#else
2245 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
2246#endif
2247}
2248
2249
2250/**
2251 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2252 */
2253IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2254{
2255#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2256 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2257#else
2258 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
2259#endif
2260}
2261
2262
2263/**
2264 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2265 */
2266IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2267{
2268#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2269 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2270#else
2271 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
2272#endif
2273}
2274
2275
2276
2277/*********************************************************************************************************************************
2278* Helpers: Segmented memory mapping. *
2279*********************************************************************************************************************************/
2280
2281/**
2282 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
2283 * segmentation.
2284 */
2285IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2286 RTGCPTR GCPtrMem, uint8_t iSegReg))
2287{
2288#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2289 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2290#else
2291 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2292#endif
2293}
2294
2295
2296/**
2297 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
2298 */
2299IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2300 RTGCPTR GCPtrMem, uint8_t iSegReg))
2301{
2302#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2303 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2304#else
2305 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2306#endif
2307}
2308
2309
2310/**
2311 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
2312 */
2313IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2314 RTGCPTR GCPtrMem, uint8_t iSegReg))
2315{
2316#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2317 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2318#else
2319 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2320#endif
2321}
2322
2323
2324/**
2325 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
2326 */
2327IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2328 RTGCPTR GCPtrMem, uint8_t iSegReg))
2329{
2330#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2331 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2332#else
2333 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2334#endif
2335}
2336
2337
2338/**
2339 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
2340 * segmentation.
2341 */
2342IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2343 RTGCPTR GCPtrMem, uint8_t iSegReg))
2344{
2345#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2346 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2347#else
2348 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2349#endif
2350}
2351
2352
2353/**
2354 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
2355 */
2356IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2357 RTGCPTR GCPtrMem, uint8_t iSegReg))
2358{
2359#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2360 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2361#else
2362 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2363#endif
2364}
2365
2366
2367/**
2368 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
2369 */
2370IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2371 RTGCPTR GCPtrMem, uint8_t iSegReg))
2372{
2373#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2374 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2375#else
2376 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2377#endif
2378}
2379
2380
2381/**
2382 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
2383 */
2384IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2385 RTGCPTR GCPtrMem, uint8_t iSegReg))
2386{
2387#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2388 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2389#else
2390 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2391#endif
2392}
2393
2394
2395/**
2396 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
2397 * segmentation.
2398 */
2399IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2400 RTGCPTR GCPtrMem, uint8_t iSegReg))
2401{
2402#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2403 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2404#else
2405 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2406#endif
2407}
2408
2409
2410/**
2411 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
2412 */
2413IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2414 RTGCPTR GCPtrMem, uint8_t iSegReg))
2415{
2416#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2417 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2418#else
2419 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2420#endif
2421}
2422
2423
2424/**
2425 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
2426 */
2427IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2428 RTGCPTR GCPtrMem, uint8_t iSegReg))
2429{
2430#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2431 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2432#else
2433 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2434#endif
2435}
2436
2437
2438/**
2439 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
2440 */
2441IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2442 RTGCPTR GCPtrMem, uint8_t iSegReg))
2443{
2444#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2445 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2446#else
2447 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2448#endif
2449}
2450
2451
2452/**
2453 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
2454 * segmentation.
2455 */
2456IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2457 RTGCPTR GCPtrMem, uint8_t iSegReg))
2458{
2459#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2460 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2461#else
2462 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2463#endif
2464}
2465
2466
2467/**
2468 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
2469 */
2470IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2471 RTGCPTR GCPtrMem, uint8_t iSegReg))
2472{
2473#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2474 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2475#else
2476 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2477#endif
2478}
2479
2480
2481/**
2482 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
2483 */
2484IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2485 RTGCPTR GCPtrMem, uint8_t iSegReg))
2486{
2487#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2488 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2489#else
2490 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2491#endif
2492}
2493
2494
2495/**
2496 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
2497 */
2498IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2499 RTGCPTR GCPtrMem, uint8_t iSegReg))
2500{
2501#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2502 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2503#else
2504 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2505#endif
2506}
2507
2508
2509/**
2510 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
2511 */
2512IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2513 RTGCPTR GCPtrMem, uint8_t iSegReg))
2514{
2515#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2516 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2517#else
2518 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2519#endif
2520}
2521
2522
2523/**
2524 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
2525 */
2526IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2527 RTGCPTR GCPtrMem, uint8_t iSegReg))
2528{
2529#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2530 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2531#else
2532 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2533#endif
2534}
2535
2536
2537/**
2538 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
2539 * segmentation.
2540 */
2541IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2542 RTGCPTR GCPtrMem, uint8_t iSegReg))
2543{
2544#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2545 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2546#else
2547 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2548#endif
2549}
2550
2551
2552/**
2553 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
2554 */
2555IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2556 RTGCPTR GCPtrMem, uint8_t iSegReg))
2557{
2558#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2559 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2560#else
2561 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2562#endif
2563}
2564
2565
2566/**
2567 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
2568 */
2569IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2570 RTGCPTR GCPtrMem, uint8_t iSegReg))
2571{
2572#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2573 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2574#else
2575 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2576#endif
2577}
2578
2579
2580/**
2581 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
2582 */
2583IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2584 RTGCPTR GCPtrMem, uint8_t iSegReg))
2585{
2586#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2587 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2588#else
2589 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2590#endif
2591}
2592
2593
2594/*********************************************************************************************************************************
2595* Helpers: Flat memory mapping. *
2596*********************************************************************************************************************************/
2597
2598/**
2599 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
2600 * address.
2601 */
2602IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2603{
2604#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2605 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2606#else
2607 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2608#endif
2609}
2610
2611
2612/**
2613 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
2614 */
2615IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2616{
2617#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2618 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2619#else
2620 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2621#endif
2622}
2623
2624
2625/**
2626 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
2627 */
2628IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2629{
2630#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2631 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2632#else
2633 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2634#endif
2635}
2636
2637
2638/**
2639 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
2640 */
2641IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2642{
2643#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2644 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2645#else
2646 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2647#endif
2648}
2649
2650
2651/**
2652 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
2653 * address.
2654 */
2655IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2656{
2657#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2658 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2659#else
2660 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2661#endif
2662}
2663
2664
2665/**
2666 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
2667 */
2668IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2669{
2670#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2671 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2672#else
2673 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2674#endif
2675}
2676
2677
2678/**
2679 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
2680 */
2681IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2682{
2683#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2684 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2685#else
2686 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2687#endif
2688}
2689
2690
2691/**
2692 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
2693 */
2694IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2695{
2696#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2697 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2698#else
2699 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2700#endif
2701}
2702
2703
2704/**
2705 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
2706 * address.
2707 */
2708IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2709{
2710#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2711 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2712#else
2713 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2714#endif
2715}
2716
2717
2718/**
2719 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
2720 */
2721IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2722{
2723#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2724 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2725#else
2726 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2727#endif
2728}
2729
2730
2731/**
2732 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
2733 */
2734IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2735{
2736#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2737 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2738#else
2739 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2740#endif
2741}
2742
2743
2744/**
2745 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
2746 */
2747IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2748{
2749#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2750 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2751#else
2752 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2753#endif
2754}
2755
2756
2757/**
2758 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
2759 * address.
2760 */
2761IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2762{
2763#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2764 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2765#else
2766 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2767#endif
2768}
2769
2770
2771/**
2772 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
2773 */
2774IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2775{
2776#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2777 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2778#else
2779 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2780#endif
2781}
2782
2783
2784/**
2785 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
2786 */
2787IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2788{
2789#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2790 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2791#else
2792 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2793#endif
2794}
2795
2796
2797/**
2798 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
2799 */
2800IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2801{
2802#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2803 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2804#else
2805 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2806#endif
2807}
2808
2809
2810/**
2811 * Used by TB code to map 80-bit float data writeonly w/ flat address.
2812 */
2813IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2814{
2815#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2816 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2817#else
2818 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2819#endif
2820}
2821
2822
2823/**
2824 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
2825 */
2826IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2827{
2828#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2829 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2830#else
2831 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2832#endif
2833}
2834
2835
2836/**
2837 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
2838 * address.
2839 */
2840IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2841{
2842#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2843 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2844#else
2845 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2846#endif
2847}
2848
2849
2850/**
2851 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
2852 */
2853IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2854{
2855#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2856 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2857#else
2858 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2859#endif
2860}
2861
2862
2863/**
2864 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
2865 */
2866IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2867{
2868#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2869 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2870#else
2871 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2872#endif
2873}
2874
2875
2876/**
2877 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
2878 */
2879IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2880{
2881#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2882 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2883#else
2884 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2885#endif
2886}
2887
2888
2889/*********************************************************************************************************************************
2890* Helpers: Commit, rollback & unmap *
2891*********************************************************************************************************************************/
2892
2893/**
2894 * Used by TB code to commit and unmap a read-write memory mapping.
2895 */
2896IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2897{
2898 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
2899}
2900
2901
2902/**
2903 * Used by TB code to commit and unmap a read-write memory mapping.
2904 */
2905IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2906{
2907 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2908}
2909
2910
2911/**
2912 * Used by TB code to commit and unmap a write-only memory mapping.
2913 */
2914IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2915{
2916 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2917}
2918
2919
2920/**
2921 * Used by TB code to commit and unmap a read-only memory mapping.
2922 */
2923IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2924{
2925 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2926}
2927
2928
2929/**
2930 * Reinitializes the native recompiler state.
2931 *
2932 * Called before starting a new recompile job.
2933 */
2934static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2935{
2936 pReNative->cLabels = 0;
2937 pReNative->bmLabelTypes = 0;
2938 pReNative->cFixups = 0;
2939#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2940 pReNative->pDbgInfo->cEntries = 0;
2941#endif
2942 pReNative->pTbOrg = pTb;
2943 pReNative->cCondDepth = 0;
2944 pReNative->uCondSeqNo = 0;
2945 pReNative->uCheckIrqSeqNo = 0;
2946 pReNative->uTlbSeqNo = 0;
2947
2948#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2949 pReNative->Core.offPc = 0;
2950 pReNative->Core.cInstrPcUpdateSkipped = 0;
2951#endif
2952 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2953#if IEMNATIVE_HST_GREG_COUNT < 32
2954 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2955#endif
2956 ;
2957 pReNative->Core.bmHstRegsWithGstShadow = 0;
2958 pReNative->Core.bmGstRegShadows = 0;
2959 pReNative->Core.bmVars = 0;
2960 pReNative->Core.bmStack = 0;
2961 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
2962 pReNative->Core.u64ArgVars = UINT64_MAX;
2963
2964 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 13);
2965 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
2966 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
2967 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
2968 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
2969 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
2970 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
2971 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
2972 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
2973 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
2974 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
2975 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
2976 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
2977 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
2978
2979 /* Full host register reinit: */
2980 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
2981 {
2982 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
2983 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
2984 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
2985 }
2986
2987 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
2988 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
2989#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2990 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
2991#endif
2992#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2993 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2994#endif
2995#ifdef IEMNATIVE_REG_FIXED_TMP1
2996 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
2997#endif
2998#ifdef IEMNATIVE_REG_FIXED_PC_DBG
2999 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
3000#endif
3001 );
3002 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3003 {
3004 fRegs &= ~RT_BIT_32(idxReg);
3005 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
3006 }
3007
3008 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
3009#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3010 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
3011#endif
3012#ifdef IEMNATIVE_REG_FIXED_TMP0
3013 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3014#endif
3015#ifdef IEMNATIVE_REG_FIXED_TMP1
3016 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
3017#endif
3018#ifdef IEMNATIVE_REG_FIXED_PC_DBG
3019 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
3020#endif
3021
3022#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3023# ifdef RT_ARCH_ARM64
3024 /*
3025 * Arm64 has 32 128-bit registers only, in order to support emulating 256-bit registers we pair
3026 * two real registers statically to one virtual for now, leaving us with only 16 256-bit registers.
3027 * We always pair v0 with v1, v2 with v3, etc. so we mark the higher register as fixed here during init
3028 * and the register allocator assumes that it will be always free when the lower is picked.
3029 */
3030 uint32_t const fFixedAdditional = UINT32_C(0xaaaaaaaa);
3031# else
3032 uint32_t const fFixedAdditional = 0;
3033# endif
3034
3035 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
3036 | fFixedAdditional
3037# if IEMNATIVE_HST_SIMD_REG_COUNT < 32
3038 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
3039# endif
3040 ;
3041 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
3042 pReNative->Core.bmGstSimdRegShadows = 0;
3043 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
3044 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
3045
3046 /* Full host register reinit: */
3047 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
3048 {
3049 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
3050 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
3051 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
3052 }
3053
3054 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK | fFixedAdditional;
3055 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3056 {
3057 fRegs &= ~RT_BIT_32(idxReg);
3058 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
3059 }
3060
3061#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
3062 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3063#endif
3064
3065#endif
3066
3067 return pReNative;
3068}
3069
3070
3071/**
3072 * Allocates and initializes the native recompiler state.
3073 *
3074 * This is called the first time an EMT wants to recompile something.
3075 *
3076 * @returns Pointer to the new recompiler state.
3077 * @param pVCpu The cross context virtual CPU structure of the calling
3078 * thread.
3079 * @param pTb The TB that's about to be recompiled.
3080 * @thread EMT(pVCpu)
3081 */
3082static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
3083{
3084 VMCPU_ASSERT_EMT(pVCpu);
3085
3086 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
3087 AssertReturn(pReNative, NULL);
3088
3089 /*
3090 * Try allocate all the buffers and stuff we need.
3091 */
3092 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
3093 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
3094 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
3095#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3096 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
3097#endif
3098 if (RT_LIKELY( pReNative->pInstrBuf
3099 && pReNative->paLabels
3100 && pReNative->paFixups)
3101#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3102 && pReNative->pDbgInfo
3103#endif
3104 )
3105 {
3106 /*
3107 * Set the buffer & array sizes on success.
3108 */
3109 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
3110 pReNative->cLabelsAlloc = _8K;
3111 pReNative->cFixupsAlloc = _16K;
3112#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3113 pReNative->cDbgInfoAlloc = _16K;
3114#endif
3115
3116 /* Other constant stuff: */
3117 pReNative->pVCpu = pVCpu;
3118
3119 /*
3120 * Done, just need to save it and reinit it.
3121 */
3122 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
3123 return iemNativeReInit(pReNative, pTb);
3124 }
3125
3126 /*
3127 * Failed. Cleanup and return.
3128 */
3129 AssertFailed();
3130 RTMemFree(pReNative->pInstrBuf);
3131 RTMemFree(pReNative->paLabels);
3132 RTMemFree(pReNative->paFixups);
3133#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3134 RTMemFree(pReNative->pDbgInfo);
3135#endif
3136 RTMemFree(pReNative);
3137 return NULL;
3138}
3139
3140
3141/**
3142 * Creates a label
3143 *
3144 * If the label does not yet have a defined position,
3145 * call iemNativeLabelDefine() later to set it.
3146 *
3147 * @returns Label ID. Throws VBox status code on failure, so no need to check
3148 * the return value.
3149 * @param pReNative The native recompile state.
3150 * @param enmType The label type.
3151 * @param offWhere The instruction offset of the label. UINT32_MAX if the
3152 * label is not yet defined (default).
3153 * @param uData Data associated with the lable. Only applicable to
3154 * certain type of labels. Default is zero.
3155 */
3156DECL_HIDDEN_THROW(uint32_t)
3157iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3158 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
3159{
3160 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
3161
3162 /*
3163 * Locate existing label definition.
3164 *
3165 * This is only allowed for forward declarations where offWhere=UINT32_MAX
3166 * and uData is zero.
3167 */
3168 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3169 uint32_t const cLabels = pReNative->cLabels;
3170 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
3171#ifndef VBOX_STRICT
3172 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
3173 && offWhere == UINT32_MAX
3174 && uData == 0
3175#endif
3176 )
3177 {
3178#ifndef VBOX_STRICT
3179 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
3180 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3181 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
3182 if (idxLabel < pReNative->cLabels)
3183 return idxLabel;
3184#else
3185 for (uint32_t i = 0; i < cLabels; i++)
3186 if ( paLabels[i].enmType == enmType
3187 && paLabels[i].uData == uData)
3188 {
3189 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3190 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3191 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
3192 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
3193 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3194 return i;
3195 }
3196 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
3197 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3198#endif
3199 }
3200
3201 /*
3202 * Make sure we've got room for another label.
3203 */
3204 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
3205 { /* likely */ }
3206 else
3207 {
3208 uint32_t cNew = pReNative->cLabelsAlloc;
3209 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3210 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3211 cNew *= 2;
3212 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
3213 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
3214 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
3215 pReNative->paLabels = paLabels;
3216 pReNative->cLabelsAlloc = cNew;
3217 }
3218
3219 /*
3220 * Define a new label.
3221 */
3222 paLabels[cLabels].off = offWhere;
3223 paLabels[cLabels].enmType = enmType;
3224 paLabels[cLabels].uData = uData;
3225 pReNative->cLabels = cLabels + 1;
3226
3227 Assert((unsigned)enmType < 64);
3228 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
3229
3230 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3231 {
3232 Assert(uData == 0);
3233 pReNative->aidxUniqueLabels[enmType] = cLabels;
3234 }
3235
3236 if (offWhere != UINT32_MAX)
3237 {
3238#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3239 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3240 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
3241#endif
3242 }
3243 return cLabels;
3244}
3245
3246
3247/**
3248 * Defines the location of an existing label.
3249 *
3250 * @param pReNative The native recompile state.
3251 * @param idxLabel The label to define.
3252 * @param offWhere The position.
3253 */
3254DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
3255{
3256 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
3257 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
3258 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
3259 pLabel->off = offWhere;
3260#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3261 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3262 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
3263#endif
3264}
3265
3266
3267/**
3268 * Looks up a lable.
3269 *
3270 * @returns Label ID if found, UINT32_MAX if not.
3271 */
3272static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3273 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
3274{
3275 Assert((unsigned)enmType < 64);
3276 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
3277 {
3278 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3279 return pReNative->aidxUniqueLabels[enmType];
3280
3281 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3282 uint32_t const cLabels = pReNative->cLabels;
3283 for (uint32_t i = 0; i < cLabels; i++)
3284 if ( paLabels[i].enmType == enmType
3285 && paLabels[i].uData == uData
3286 && ( paLabels[i].off == offWhere
3287 || offWhere == UINT32_MAX
3288 || paLabels[i].off == UINT32_MAX))
3289 return i;
3290 }
3291 return UINT32_MAX;
3292}
3293
3294
3295/**
3296 * Adds a fixup.
3297 *
3298 * @throws VBox status code (int) on failure.
3299 * @param pReNative The native recompile state.
3300 * @param offWhere The instruction offset of the fixup location.
3301 * @param idxLabel The target label ID for the fixup.
3302 * @param enmType The fixup type.
3303 * @param offAddend Fixup addend if applicable to the type. Default is 0.
3304 */
3305DECL_HIDDEN_THROW(void)
3306iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
3307 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
3308{
3309 Assert(idxLabel <= UINT16_MAX);
3310 Assert((unsigned)enmType <= UINT8_MAX);
3311
3312 /*
3313 * Make sure we've room.
3314 */
3315 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
3316 uint32_t const cFixups = pReNative->cFixups;
3317 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
3318 { /* likely */ }
3319 else
3320 {
3321 uint32_t cNew = pReNative->cFixupsAlloc;
3322 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3323 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3324 cNew *= 2;
3325 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
3326 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
3327 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
3328 pReNative->paFixups = paFixups;
3329 pReNative->cFixupsAlloc = cNew;
3330 }
3331
3332 /*
3333 * Add the fixup.
3334 */
3335 paFixups[cFixups].off = offWhere;
3336 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
3337 paFixups[cFixups].enmType = enmType;
3338 paFixups[cFixups].offAddend = offAddend;
3339 pReNative->cFixups = cFixups + 1;
3340}
3341
3342
3343/**
3344 * Slow code path for iemNativeInstrBufEnsure.
3345 */
3346DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
3347{
3348 /* Double the buffer size till we meet the request. */
3349 uint32_t cNew = pReNative->cInstrBufAlloc;
3350 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
3351 do
3352 cNew *= 2;
3353 while (cNew < off + cInstrReq);
3354
3355 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
3356#ifdef RT_ARCH_ARM64
3357 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
3358#else
3359 uint32_t const cbMaxInstrBuf = _2M;
3360#endif
3361 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
3362
3363 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
3364 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
3365
3366#ifdef VBOX_STRICT
3367 pReNative->offInstrBufChecked = off + cInstrReq;
3368#endif
3369 pReNative->cInstrBufAlloc = cNew;
3370 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
3371}
3372
3373#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3374
3375/**
3376 * Grows the static debug info array used during recompilation.
3377 *
3378 * @returns Pointer to the new debug info block; throws VBox status code on
3379 * failure, so no need to check the return value.
3380 */
3381DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3382{
3383 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
3384 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
3385 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
3386 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
3387 pReNative->pDbgInfo = pDbgInfo;
3388 pReNative->cDbgInfoAlloc = cNew;
3389 return pDbgInfo;
3390}
3391
3392
3393/**
3394 * Adds a new debug info uninitialized entry, returning the pointer to it.
3395 */
3396DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3397{
3398 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
3399 { /* likely */ }
3400 else
3401 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
3402 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
3403}
3404
3405
3406/**
3407 * Debug Info: Adds a native offset record, if necessary.
3408 */
3409static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3410{
3411 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
3412
3413 /*
3414 * Search backwards to see if we've got a similar record already.
3415 */
3416 uint32_t idx = pDbgInfo->cEntries;
3417 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
3418 while (idx-- > idxStop)
3419 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
3420 {
3421 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
3422 return;
3423 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
3424 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
3425 break;
3426 }
3427
3428 /*
3429 * Add it.
3430 */
3431 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
3432 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
3433 pEntry->NativeOffset.offNative = off;
3434}
3435
3436
3437/**
3438 * Debug Info: Record info about a label.
3439 */
3440static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
3441{
3442 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3443 pEntry->Label.uType = kIemTbDbgEntryType_Label;
3444 pEntry->Label.uUnused = 0;
3445 pEntry->Label.enmLabel = (uint8_t)enmType;
3446 pEntry->Label.uData = uData;
3447}
3448
3449
3450/**
3451 * Debug Info: Record info about a threaded call.
3452 */
3453static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
3454{
3455 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3456 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
3457 pEntry->ThreadedCall.fRecompiled = fRecompiled;
3458 pEntry->ThreadedCall.uUnused = 0;
3459 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
3460}
3461
3462
3463/**
3464 * Debug Info: Record info about a new guest instruction.
3465 */
3466static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
3467{
3468 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3469 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
3470 pEntry->GuestInstruction.uUnused = 0;
3471 pEntry->GuestInstruction.fExec = fExec;
3472}
3473
3474
3475/**
3476 * Debug Info: Record info about guest register shadowing.
3477 */
3478static void iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
3479 uint8_t idxHstReg = UINT8_MAX, uint8_t idxHstRegPrev = UINT8_MAX)
3480{
3481 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3482 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
3483 pEntry->GuestRegShadowing.uUnused = 0;
3484 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
3485 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
3486 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
3487}
3488
3489
3490# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3491/**
3492 * Debug Info: Record info about guest register shadowing.
3493 */
3494static void iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
3495 uint8_t idxHstSimdReg = UINT8_MAX, uint8_t idxHstSimdRegPrev = UINT8_MAX)
3496{
3497 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3498 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
3499 pEntry->GuestSimdRegShadowing.uUnused = 0;
3500 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
3501 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
3502 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
3503}
3504# endif
3505
3506
3507# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3508/**
3509 * Debug Info: Record info about delayed RIP updates.
3510 */
3511static void iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t offPc, uint32_t cInstrSkipped)
3512{
3513 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3514 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
3515 pEntry->DelayedPcUpdate.offPc = offPc;
3516 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
3517}
3518# endif
3519
3520#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
3521
3522
3523/*********************************************************************************************************************************
3524* Register Allocator *
3525*********************************************************************************************************************************/
3526
3527/**
3528 * Register parameter indexes (indexed by argument number).
3529 */
3530DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
3531{
3532 IEMNATIVE_CALL_ARG0_GREG,
3533 IEMNATIVE_CALL_ARG1_GREG,
3534 IEMNATIVE_CALL_ARG2_GREG,
3535 IEMNATIVE_CALL_ARG3_GREG,
3536#if defined(IEMNATIVE_CALL_ARG4_GREG)
3537 IEMNATIVE_CALL_ARG4_GREG,
3538# if defined(IEMNATIVE_CALL_ARG5_GREG)
3539 IEMNATIVE_CALL_ARG5_GREG,
3540# if defined(IEMNATIVE_CALL_ARG6_GREG)
3541 IEMNATIVE_CALL_ARG6_GREG,
3542# if defined(IEMNATIVE_CALL_ARG7_GREG)
3543 IEMNATIVE_CALL_ARG7_GREG,
3544# endif
3545# endif
3546# endif
3547#endif
3548};
3549
3550/**
3551 * Call register masks indexed by argument count.
3552 */
3553DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
3554{
3555 0,
3556 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
3557 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
3558 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
3559 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3560 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
3561#if defined(IEMNATIVE_CALL_ARG4_GREG)
3562 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3563 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
3564# if defined(IEMNATIVE_CALL_ARG5_GREG)
3565 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3566 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
3567# if defined(IEMNATIVE_CALL_ARG6_GREG)
3568 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3569 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3570 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
3571# if defined(IEMNATIVE_CALL_ARG7_GREG)
3572 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3573 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3574 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
3575# endif
3576# endif
3577# endif
3578#endif
3579};
3580
3581#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
3582/**
3583 * BP offset of the stack argument slots.
3584 *
3585 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
3586 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
3587 */
3588DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
3589{
3590 IEMNATIVE_FP_OFF_STACK_ARG0,
3591# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
3592 IEMNATIVE_FP_OFF_STACK_ARG1,
3593# endif
3594# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
3595 IEMNATIVE_FP_OFF_STACK_ARG2,
3596# endif
3597# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
3598 IEMNATIVE_FP_OFF_STACK_ARG3,
3599# endif
3600};
3601AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
3602#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
3603
3604/**
3605 * Info about shadowed guest register values.
3606 * @see IEMNATIVEGSTREG
3607 */
3608static struct
3609{
3610 /** Offset in VMCPU. */
3611 uint32_t off;
3612 /** The field size. */
3613 uint8_t cb;
3614 /** Name (for logging). */
3615 const char *pszName;
3616} const g_aGstShadowInfo[] =
3617{
3618#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
3619 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
3620 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
3621 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
3622 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
3623 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
3624 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
3625 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
3626 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
3627 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
3628 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
3629 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
3630 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
3631 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
3632 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
3633 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
3634 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
3635 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
3636 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
3637 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
3638 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
3639 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
3640 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
3641 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
3642 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
3643 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
3644 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
3645 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
3646 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
3647 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
3648 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
3649 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
3650 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
3651 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
3652 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
3653 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
3654 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
3655 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
3656 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
3657 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
3658 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
3659 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
3660 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
3661 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
3662 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
3663 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
3664 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
3665 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
3666 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
3667#undef CPUMCTX_OFF_AND_SIZE
3668};
3669AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
3670
3671
3672/** Host CPU general purpose register names. */
3673DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
3674{
3675#ifdef RT_ARCH_AMD64
3676 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
3677#elif RT_ARCH_ARM64
3678 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
3679 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
3680#else
3681# error "port me"
3682#endif
3683};
3684
3685
3686DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
3687 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
3688{
3689 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3690
3691 pReNative->Core.aHstRegs[idxReg].enmWhat = enmWhat;
3692 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3693 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
3694 return (uint8_t)idxReg;
3695}
3696
3697
3698#if 0 /* unused */
3699/**
3700 * Tries to locate a suitable register in the given register mask.
3701 *
3702 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3703 * failed.
3704 *
3705 * @returns Host register number on success, returns UINT8_MAX on failure.
3706 */
3707static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
3708{
3709 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3710 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3711 if (fRegs)
3712 {
3713 /** @todo pick better here: */
3714 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
3715
3716 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3717 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3718 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3719 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3720
3721 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3722 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3723 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3724 return idxReg;
3725 }
3726 return UINT8_MAX;
3727}
3728#endif /* unused */
3729
3730
3731/**
3732 * Locate a register, possibly freeing one up.
3733 *
3734 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3735 * failed.
3736 *
3737 * @returns Host register number on success. Returns UINT8_MAX if no registers
3738 * found, the caller is supposed to deal with this and raise a
3739 * allocation type specific status code (if desired).
3740 *
3741 * @throws VBox status code if we're run into trouble spilling a variable of
3742 * recording debug info. Does NOT throw anything if we're out of
3743 * registers, though.
3744 */
3745static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3746 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3747{
3748 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
3749 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3750 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3751
3752 /*
3753 * Try a freed register that's shadowing a guest register.
3754 */
3755 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3756 if (fRegs)
3757 {
3758 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
3759
3760#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3761 /*
3762 * When we have livness information, we use it to kick out all shadowed
3763 * guest register that will not be needed any more in this TB. If we're
3764 * lucky, this may prevent us from ending up here again.
3765 *
3766 * Note! We must consider the previous entry here so we don't free
3767 * anything that the current threaded function requires (current
3768 * entry is produced by the next threaded function).
3769 */
3770 uint32_t const idxCurCall = pReNative->idxCurCall;
3771 if (idxCurCall > 0)
3772 {
3773 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
3774
3775# ifndef IEMLIVENESS_EXTENDED_LAYOUT
3776 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
3777 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
3778 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
3779#else
3780 /* Construct a mask of the registers not in the read or write state.
3781 Note! We could skips writes, if they aren't from us, as this is just
3782 a hack to prevent trashing registers that have just been written
3783 or will be written when we retire the current instruction. */
3784 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3785 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3786 & IEMLIVENESSBIT_MASK;
3787#endif
3788 /* Merge EFLAGS. */
3789 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
3790 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
3791 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
3792 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
3793 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
3794
3795 /* If it matches any shadowed registers. */
3796 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
3797 {
3798 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
3799 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
3800 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
3801
3802 /* See if we've got any unshadowed registers we can return now. */
3803 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
3804 if (fUnshadowedRegs)
3805 {
3806 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
3807 return (fPreferVolatile
3808 ? ASMBitFirstSetU32(fUnshadowedRegs)
3809 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3810 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
3811 - 1;
3812 }
3813 }
3814 }
3815#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
3816
3817 unsigned const idxReg = (fPreferVolatile
3818 ? ASMBitFirstSetU32(fRegs)
3819 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3820 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
3821 - 1;
3822
3823 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3824 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3825 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3826 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3827
3828 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3829 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3830 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3831 return idxReg;
3832 }
3833
3834 /*
3835 * Try free up a variable that's in a register.
3836 *
3837 * We do two rounds here, first evacuating variables we don't need to be
3838 * saved on the stack, then in the second round move things to the stack.
3839 */
3840 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
3841 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3842 {
3843 uint32_t fVars = pReNative->Core.bmVars;
3844 while (fVars)
3845 {
3846 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3847 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3848 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3849 && (RT_BIT_32(idxReg) & fRegMask)
3850 && ( iLoop == 0
3851 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3852 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3853 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3854 {
3855 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3856 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3857 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3858 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3859 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3860 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3861
3862 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3863 {
3864 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
3865 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3866 }
3867
3868 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3869 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3870
3871 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3872 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3873 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3874 return idxReg;
3875 }
3876 fVars &= ~RT_BIT_32(idxVar);
3877 }
3878 }
3879
3880 return UINT8_MAX;
3881}
3882
3883
3884/**
3885 * Reassigns a variable to a different register specified by the caller.
3886 *
3887 * @returns The new code buffer position.
3888 * @param pReNative The native recompile state.
3889 * @param off The current code buffer position.
3890 * @param idxVar The variable index.
3891 * @param idxRegOld The old host register number.
3892 * @param idxRegNew The new host register number.
3893 * @param pszCaller The caller for logging.
3894 */
3895static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3896 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3897{
3898 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3899 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
3900 RT_NOREF(pszCaller);
3901
3902 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3903
3904 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3905 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
3906 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3907 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3908
3909 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3910 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3911 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3912 if (fGstRegShadows)
3913 {
3914 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3915 | RT_BIT_32(idxRegNew);
3916 while (fGstRegShadows)
3917 {
3918 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3919 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3920
3921 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3922 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3923 }
3924 }
3925
3926 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
3927 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3928 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3929 return off;
3930}
3931
3932
3933/**
3934 * Moves a variable to a different register or spills it onto the stack.
3935 *
3936 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3937 * kinds can easily be recreated if needed later.
3938 *
3939 * @returns The new code buffer position.
3940 * @param pReNative The native recompile state.
3941 * @param off The current code buffer position.
3942 * @param idxVar The variable index.
3943 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3944 * call-volatile registers.
3945 */
3946static uint32_t iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3947 uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK)
3948{
3949 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3950 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3951 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
3952 Assert(!pVar->fRegAcquired);
3953
3954 uint8_t const idxRegOld = pVar->idxReg;
3955 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3956 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3957 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3958 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3959 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3960 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3961 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3962 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3963
3964
3965 /** @todo Add statistics on this.*/
3966 /** @todo Implement basic variable liveness analysis (python) so variables
3967 * can be freed immediately once no longer used. This has the potential to
3968 * be trashing registers and stack for dead variables.
3969 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
3970
3971 /*
3972 * First try move it to a different register, as that's cheaper.
3973 */
3974 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3975 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3976 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3977 if (fRegs)
3978 {
3979 /* Avoid using shadow registers, if possible. */
3980 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3981 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3982 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3983 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3984 }
3985
3986 /*
3987 * Otherwise we must spill the register onto the stack.
3988 */
3989 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3990 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3991 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3992 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3993
3994 pVar->idxReg = UINT8_MAX;
3995 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3996 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3997 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3998 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3999 return off;
4000}
4001
4002
4003/**
4004 * Allocates a temporary host general purpose register.
4005 *
4006 * This may emit code to save register content onto the stack in order to free
4007 * up a register.
4008 *
4009 * @returns The host register number; throws VBox status code on failure,
4010 * so no need to check the return value.
4011 * @param pReNative The native recompile state.
4012 * @param poff Pointer to the variable with the code buffer position.
4013 * This will be update if we need to move a variable from
4014 * register to stack in order to satisfy the request.
4015 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4016 * registers (@c true, default) or the other way around
4017 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4018 */
4019DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
4020{
4021 /*
4022 * Try find a completely unused register, preferably a call-volatile one.
4023 */
4024 uint8_t idxReg;
4025 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4026 & ~pReNative->Core.bmHstRegsWithGstShadow
4027 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
4028 if (fRegs)
4029 {
4030 if (fPreferVolatile)
4031 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4032 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4033 else
4034 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4035 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4036 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4037 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4038 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4039 }
4040 else
4041 {
4042 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
4043 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4044 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4045 }
4046 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4047}
4048
4049
4050/**
4051 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
4052 * registers.
4053 *
4054 * @returns The host register number; throws VBox status code on failure,
4055 * so no need to check the return value.
4056 * @param pReNative The native recompile state.
4057 * @param poff Pointer to the variable with the code buffer position.
4058 * This will be update if we need to move a variable from
4059 * register to stack in order to satisfy the request.
4060 * @param fRegMask Mask of acceptable registers.
4061 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4062 * registers (@c true, default) or the other way around
4063 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4064 */
4065DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
4066 bool fPreferVolatile /*= true*/)
4067{
4068 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
4069 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
4070
4071 /*
4072 * Try find a completely unused register, preferably a call-volatile one.
4073 */
4074 uint8_t idxReg;
4075 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4076 & ~pReNative->Core.bmHstRegsWithGstShadow
4077 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
4078 & fRegMask;
4079 if (fRegs)
4080 {
4081 if (fPreferVolatile)
4082 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4083 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4084 else
4085 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4086 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4087 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4088 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4089 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4090 }
4091 else
4092 {
4093 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
4094 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4095 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4096 }
4097 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4098}
4099
4100
4101/**
4102 * Allocates a temporary register for loading an immediate value into.
4103 *
4104 * This will emit code to load the immediate, unless there happens to be an
4105 * unused register with the value already loaded.
4106 *
4107 * The caller will not modify the returned register, it must be considered
4108 * read-only. Free using iemNativeRegFreeTmpImm.
4109 *
4110 * @returns The host register number; throws VBox status code on failure, so no
4111 * need to check the return value.
4112 * @param pReNative The native recompile state.
4113 * @param poff Pointer to the variable with the code buffer position.
4114 * @param uImm The immediate value that the register must hold upon
4115 * return.
4116 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4117 * registers (@c true, default) or the other way around
4118 * (@c false).
4119 *
4120 * @note Reusing immediate values has not been implemented yet.
4121 */
4122DECL_HIDDEN_THROW(uint8_t)
4123iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
4124{
4125 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
4126 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
4127 return idxReg;
4128}
4129
4130#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4131
4132# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4133/**
4134 * Helper for iemNativeLivenessGetStateByGstReg.
4135 *
4136 * @returns IEMLIVENESS_STATE_XXX
4137 * @param fMergedStateExp2 This is the RT_BIT_32() of each sub-state
4138 * ORed together.
4139 */
4140DECL_FORCE_INLINE(uint32_t)
4141iemNativeLivenessMergeExpandedEFlagsState(uint32_t fMergedStateExp2)
4142{
4143 /* INPUT trumps anything else. */
4144 if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_INPUT))
4145 return IEMLIVENESS_STATE_INPUT;
4146
4147 /* CLOBBERED trumps XCPT_OR_CALL and UNUSED. */
4148 if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_CLOBBERED))
4149 {
4150 /* If not all sub-fields are clobbered they must be considered INPUT. */
4151 if (fMergedStateExp2 & (RT_BIT_32(IEMLIVENESS_STATE_UNUSED) | RT_BIT_32(IEMLIVENESS_STATE_XCPT_OR_CALL)))
4152 return IEMLIVENESS_STATE_INPUT;
4153 return IEMLIVENESS_STATE_CLOBBERED;
4154 }
4155
4156 /* XCPT_OR_CALL trumps UNUSED. */
4157 if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_XCPT_OR_CALL))
4158 return IEMLIVENESS_STATE_XCPT_OR_CALL;
4159
4160 return IEMLIVENESS_STATE_UNUSED;
4161}
4162# endif /* !IEMLIVENESS_EXTENDED_LAYOUT */
4163
4164
4165DECL_FORCE_INLINE(uint32_t)
4166iemNativeLivenessGetStateByGstRegEx(PCIEMLIVENESSENTRY pLivenessEntry, unsigned enmGstRegEx)
4167{
4168# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4169 return ((pLivenessEntry->Bit0.bm64 >> enmGstRegEx) & 1)
4170 | (((pLivenessEntry->Bit1.bm64 >> enmGstRegEx) << 1) & 2);
4171# else
4172 return ( (pLivenessEntry->Bit0.bm64 >> enmGstRegEx) & 1)
4173 | (((pLivenessEntry->Bit1.bm64 >> enmGstRegEx) << 1) & 2)
4174 | (((pLivenessEntry->Bit2.bm64 >> enmGstRegEx) << 2) & 4)
4175 | (((pLivenessEntry->Bit3.bm64 >> enmGstRegEx) << 2) & 8);
4176# endif
4177}
4178
4179
4180DECL_FORCE_INLINE(uint32_t)
4181iemNativeLivenessGetStateByGstReg(PCIEMLIVENESSENTRY pLivenessEntry, IEMNATIVEGSTREG enmGstReg)
4182{
4183 uint32_t uRet = iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, (unsigned)enmGstReg);
4184 if (enmGstReg == kIemNativeGstReg_EFlags)
4185 {
4186 /* Merge the eflags states to one. */
4187# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4188 uRet = RT_BIT_32(uRet);
4189 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflCf | (pLivenessEntry->Bit1.fEflCf << 1));
4190 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflPf | (pLivenessEntry->Bit1.fEflPf << 1));
4191 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflAf | (pLivenessEntry->Bit1.fEflAf << 1));
4192 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflZf | (pLivenessEntry->Bit1.fEflZf << 1));
4193 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflSf | (pLivenessEntry->Bit1.fEflSf << 1));
4194 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflOf | (pLivenessEntry->Bit1.fEflOf << 1));
4195 uRet = iemNativeLivenessMergeExpandedEFlagsState(uRet);
4196# else
4197 AssertCompile(IEMLIVENESSBIT_IDX_EFL_OTHER == (unsigned)kIemNativeGstReg_EFlags);
4198 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_CF);
4199 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_PF);
4200 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_AF);
4201 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_ZF);
4202 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_SF);
4203 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_OF);
4204# endif
4205 }
4206 return uRet;
4207}
4208
4209
4210# ifdef VBOX_STRICT
4211/** For assertions only, user checks that idxCurCall isn't zerow. */
4212DECL_FORCE_INLINE(uint32_t)
4213iemNativeLivenessGetPrevStateByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg)
4214{
4215 return iemNativeLivenessGetStateByGstReg(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], enmGstReg);
4216}
4217# endif /* VBOX_STRICT */
4218
4219#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4220
4221/**
4222 * Marks host register @a idxHstReg as containing a shadow copy of guest
4223 * register @a enmGstReg.
4224 *
4225 * ASSUMES that caller has made sure @a enmGstReg is not associated with any
4226 * host register before calling.
4227 */
4228DECL_FORCE_INLINE(void)
4229iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
4230{
4231 Assert(!(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg)));
4232 Assert(!pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4233 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
4234
4235 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxHstReg;
4236 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg); /** @todo why? not OR? */
4237 pReNative->Core.bmGstRegShadows |= RT_BIT_64(enmGstReg);
4238 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
4239#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4240 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4241 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxHstReg);
4242#else
4243 RT_NOREF(off);
4244#endif
4245}
4246
4247
4248/**
4249 * Clear any guest register shadow claims from @a idxHstReg.
4250 *
4251 * The register does not need to be shadowing any guest registers.
4252 */
4253DECL_FORCE_INLINE(void)
4254iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off)
4255{
4256 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4257 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
4258 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4259 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4260 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4261
4262#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4263 uint64_t fGstRegs = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4264 if (fGstRegs)
4265 {
4266 Assert(fGstRegs < RT_BIT_64(kIemNativeGstReg_End));
4267 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4268 while (fGstRegs)
4269 {
4270 unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4271 fGstRegs &= ~RT_BIT_64(iGstReg);
4272 iemNativeDbgInfoAddGuestRegShadowing(pReNative, (IEMNATIVEGSTREG)iGstReg, UINT8_MAX, idxHstReg);
4273 }
4274 }
4275#else
4276 RT_NOREF(off);
4277#endif
4278
4279 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4280 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4281 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4282}
4283
4284
4285/**
4286 * Clear guest register shadow claim regarding @a enmGstReg from @a idxHstReg
4287 * and global overview flags.
4288 */
4289DECL_FORCE_INLINE(void)
4290iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
4291{
4292 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4293 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4294 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
4295 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4296 Assert(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg));
4297 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4298 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4299
4300#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4301 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4302 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, UINT8_MAX, idxHstReg);
4303#else
4304 RT_NOREF(off);
4305#endif
4306
4307 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~RT_BIT_64(enmGstReg);
4308 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4309 if (!fGstRegShadowsNew)
4310 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4311 pReNative->Core.bmGstRegShadows &= ~RT_BIT_64(enmGstReg);
4312}
4313
4314
4315#if 0 /* unused */
4316/**
4317 * Clear any guest register shadow claim for @a enmGstReg.
4318 */
4319DECL_FORCE_INLINE(void)
4320iemNativeRegClearGstRegShadowingByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg, uint32_t off)
4321{
4322 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4323 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4324 {
4325 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] < RT_ELEMENTS(pReNative->Core.aHstRegs));
4326 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
4327 }
4328}
4329#endif
4330
4331
4332/**
4333 * Clear any guest register shadow claim for @a enmGstReg and mark @a idxHstRegNew
4334 * as the new shadow of it.
4335 *
4336 * Unlike the other guest reg shadow helpers, this does the logging for you.
4337 * However, it is the liveness state is not asserted here, the caller must do
4338 * that.
4339 */
4340DECL_FORCE_INLINE(void)
4341iemNativeRegClearAndMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstRegNew,
4342 IEMNATIVEGSTREG enmGstReg, uint32_t off)
4343{
4344 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4345 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4346 {
4347 uint8_t const idxHstRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
4348 Assert(idxHstRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
4349 if (idxHstRegOld == idxHstRegNew)
4350 return;
4351 Log12(("iemNativeRegClearAndMarkAsGstRegShadow: %s for guest %s (from %s)\n", g_apszIemNativeHstRegNames[idxHstRegNew],
4352 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstRegOld]));
4353 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
4354 }
4355 else
4356 Log12(("iemNativeRegClearAndMarkAsGstRegShadow: %s for guest %s\n", g_apszIemNativeHstRegNames[idxHstRegNew],
4357 g_aGstShadowInfo[enmGstReg].pszName));
4358 iemNativeRegMarkAsGstRegShadow(pReNative, idxHstRegNew, enmGstReg, off);
4359}
4360
4361
4362/**
4363 * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
4364 * to @a idxRegTo.
4365 */
4366DECL_FORCE_INLINE(void)
4367iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo,
4368 IEMNATIVEGSTREG enmGstReg, uint32_t off)
4369{
4370 Assert(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
4371 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] == idxRegFrom);
4372 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows)
4373 == pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows
4374 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4375 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows)
4376 == pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows);
4377 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
4378 == RT_BOOL(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows));
4379
4380 uint64_t const fGstRegShadowsFrom = pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & ~RT_BIT_64(enmGstReg);
4381 pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows = fGstRegShadowsFrom;
4382 if (!fGstRegShadowsFrom)
4383 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegFrom);
4384 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxRegTo);
4385 pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows |= RT_BIT_64(enmGstReg);
4386 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxRegTo;
4387#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4388 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4389 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxRegTo, idxRegFrom);
4390#else
4391 RT_NOREF(off);
4392#endif
4393}
4394
4395
4396/**
4397 * Allocates a temporary host general purpose register for keeping a guest
4398 * register value.
4399 *
4400 * Since we may already have a register holding the guest register value,
4401 * code will be emitted to do the loading if that's not the case. Code may also
4402 * be emitted if we have to free up a register to satify the request.
4403 *
4404 * @returns The host register number; throws VBox status code on failure, so no
4405 * need to check the return value.
4406 * @param pReNative The native recompile state.
4407 * @param poff Pointer to the variable with the code buffer
4408 * position. This will be update if we need to move a
4409 * variable from register to stack in order to satisfy
4410 * the request.
4411 * @param enmGstReg The guest register that will is to be updated.
4412 * @param enmIntendedUse How the caller will be using the host register.
4413 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
4414 * register is okay (default). The ASSUMPTION here is
4415 * that the caller has already flushed all volatile
4416 * registers, so this is only applied if we allocate a
4417 * new register.
4418 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
4419 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
4420 */
4421DECL_HIDDEN_THROW(uint8_t)
4422iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
4423 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
4424 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
4425{
4426 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4427#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4428 AssertMsg( fSkipLivenessAssert
4429 || pReNative->idxCurCall == 0
4430 || enmGstReg == kIemNativeGstReg_Pc
4431 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4432 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4433 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
4434 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4435 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
4436 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4437#endif
4438 RT_NOREF(fSkipLivenessAssert);
4439#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
4440 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
4441#endif
4442 uint32_t const fRegMask = !fNoVolatileRegs
4443 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
4444 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4445
4446 /*
4447 * First check if the guest register value is already in a host register.
4448 */
4449 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4450 {
4451 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4452 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4453 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4454 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4455
4456 /* It's not supposed to be allocated... */
4457 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4458 {
4459 /*
4460 * If the register will trash the guest shadow copy, try find a
4461 * completely unused register we can use instead. If that fails,
4462 * we need to disassociate the host reg from the guest reg.
4463 */
4464 /** @todo would be nice to know if preserving the register is in any way helpful. */
4465 /* If the purpose is calculations, try duplicate the register value as
4466 we'll be clobbering the shadow. */
4467 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
4468 && ( ~pReNative->Core.bmHstRegs
4469 & ~pReNative->Core.bmHstRegsWithGstShadow
4470 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
4471 {
4472 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
4473
4474 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4475
4476 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4477 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4478 g_apszIemNativeHstRegNames[idxRegNew]));
4479 idxReg = idxRegNew;
4480 }
4481 /* If the current register matches the restrictions, go ahead and allocate
4482 it for the caller. */
4483 else if (fRegMask & RT_BIT_32(idxReg))
4484 {
4485 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4486 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4487 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4488 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4489 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
4490 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4491 else
4492 {
4493 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
4494 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
4495 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4496 }
4497 }
4498 /* Otherwise, allocate a register that satisfies the caller and transfer
4499 the shadowing if compatible with the intended use. (This basically
4500 means the call wants a non-volatile register (RSP push/pop scenario).) */
4501 else
4502 {
4503 Assert(fNoVolatileRegs);
4504 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
4505 !fNoVolatileRegs
4506 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
4507 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4508 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4509 {
4510 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4511 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
4512 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
4513 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4514 }
4515 else
4516 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4517 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4518 g_apszIemNativeHstRegNames[idxRegNew]));
4519 idxReg = idxRegNew;
4520 }
4521 }
4522 else
4523 {
4524 /*
4525 * Oops. Shadowed guest register already allocated!
4526 *
4527 * Allocate a new register, copy the value and, if updating, the
4528 * guest shadow copy assignment to the new register.
4529 */
4530 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4531 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
4532 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
4533 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
4534
4535 /** @todo share register for readonly access. */
4536 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
4537 enmIntendedUse == kIemNativeGstRegUse_Calculation);
4538
4539 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4540 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4541
4542 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4543 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4544 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
4545 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4546 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4547 else
4548 {
4549 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4550 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
4551 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4552 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4553 }
4554 idxReg = idxRegNew;
4555 }
4556 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
4557
4558#ifdef VBOX_STRICT
4559 /* Strict builds: Check that the value is correct. */
4560 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4561#endif
4562
4563 return idxReg;
4564 }
4565
4566 /*
4567 * Allocate a new register, load it with the guest value and designate it as a copy of the
4568 */
4569 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
4570
4571 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4572 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
4573
4574 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4575 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
4576 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
4577 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4578
4579 return idxRegNew;
4580}
4581
4582
4583/**
4584 * Allocates a temporary host general purpose register that already holds the
4585 * given guest register value.
4586 *
4587 * The use case for this function is places where the shadowing state cannot be
4588 * modified due to branching and such. This will fail if the we don't have a
4589 * current shadow copy handy or if it's incompatible. The only code that will
4590 * be emitted here is value checking code in strict builds.
4591 *
4592 * The intended use can only be readonly!
4593 *
4594 * @returns The host register number, UINT8_MAX if not present.
4595 * @param pReNative The native recompile state.
4596 * @param poff Pointer to the instruction buffer offset.
4597 * Will be updated in strict builds if a register is
4598 * found.
4599 * @param enmGstReg The guest register that will is to be updated.
4600 * @note In strict builds, this may throw instruction buffer growth failures.
4601 * Non-strict builds will not throw anything.
4602 * @sa iemNativeRegAllocTmpForGuestReg
4603 */
4604DECL_HIDDEN_THROW(uint8_t)
4605iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4606{
4607 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4608#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4609 AssertMsg( pReNative->idxCurCall == 0
4610 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4611 || enmGstReg == kIemNativeGstReg_Pc,
4612 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4613#endif
4614
4615 /*
4616 * First check if the guest register value is already in a host register.
4617 */
4618 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4619 {
4620 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4621 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4622 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4623 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4624
4625 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4626 {
4627 /*
4628 * We only do readonly use here, so easy compared to the other
4629 * variant of this code.
4630 */
4631 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4632 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4633 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4634 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
4635 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4636
4637#ifdef VBOX_STRICT
4638 /* Strict builds: Check that the value is correct. */
4639 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4640#else
4641 RT_NOREF(poff);
4642#endif
4643 return idxReg;
4644 }
4645 }
4646
4647 return UINT8_MAX;
4648}
4649
4650
4651/**
4652 * Allocates argument registers for a function call.
4653 *
4654 * @returns New code buffer offset on success; throws VBox status code on failure, so no
4655 * need to check the return value.
4656 * @param pReNative The native recompile state.
4657 * @param off The current code buffer offset.
4658 * @param cArgs The number of arguments the function call takes.
4659 */
4660DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
4661{
4662 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
4663 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
4664 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4665 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4666
4667 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4668 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4669 else if (cArgs == 0)
4670 return true;
4671
4672 /*
4673 * Do we get luck and all register are free and not shadowing anything?
4674 */
4675 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
4676 for (uint32_t i = 0; i < cArgs; i++)
4677 {
4678 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4679 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4680 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4681 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4682 }
4683 /*
4684 * Okay, not lucky so we have to free up the registers.
4685 */
4686 else
4687 for (uint32_t i = 0; i < cArgs; i++)
4688 {
4689 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4690 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
4691 {
4692 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4693 {
4694 case kIemNativeWhat_Var:
4695 {
4696 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4697 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4698 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
4699 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4700 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
4701
4702 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
4703 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4704 else
4705 {
4706 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4707 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4708 }
4709 break;
4710 }
4711
4712 case kIemNativeWhat_Tmp:
4713 case kIemNativeWhat_Arg:
4714 case kIemNativeWhat_rc:
4715 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4716 default:
4717 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
4718 }
4719
4720 }
4721 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4722 {
4723 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4724 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4725 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4726 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4727 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4728 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4729 }
4730 else
4731 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4732 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4733 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4734 }
4735 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
4736 return true;
4737}
4738
4739
4740DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
4741
4742
4743#if 0
4744/**
4745 * Frees a register assignment of any type.
4746 *
4747 * @param pReNative The native recompile state.
4748 * @param idxHstReg The register to free.
4749 *
4750 * @note Does not update variables.
4751 */
4752DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4753{
4754 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4755 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4756 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
4757 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
4758 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
4759 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
4760 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
4761 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
4762 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
4763 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
4764 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4765 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4766 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4767 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4768
4769 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4770 /* no flushing, right:
4771 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4772 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4773 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4774 */
4775}
4776#endif
4777
4778
4779/**
4780 * Frees a temporary register.
4781 *
4782 * Any shadow copies of guest registers assigned to the host register will not
4783 * be flushed by this operation.
4784 */
4785DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4786{
4787 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4788 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
4789 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4790 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
4791 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4792}
4793
4794
4795/**
4796 * Frees a temporary immediate register.
4797 *
4798 * It is assumed that the call has not modified the register, so it still hold
4799 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
4800 */
4801DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4802{
4803 iemNativeRegFreeTmp(pReNative, idxHstReg);
4804}
4805
4806
4807/**
4808 * Frees a register assigned to a variable.
4809 *
4810 * The register will be disassociated from the variable.
4811 */
4812DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4813{
4814 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4815 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4816 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4817 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4818 Assert(pReNative->Core.aVars[idxVar].idxReg == idxHstReg);
4819
4820 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4821 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4822 if (!fFlushShadows)
4823 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4824 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4825 else
4826 {
4827 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4828 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4829 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4830 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4831 uint64_t fGstRegShadows = fGstRegShadowsOld;
4832 while (fGstRegShadows)
4833 {
4834 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4835 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4836
4837 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4838 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4839 }
4840 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4841 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4842 }
4843}
4844
4845
4846/**
4847 * Called right before emitting a call instruction to move anything important
4848 * out of call-volatile registers, free and flush the call-volatile registers,
4849 * optionally freeing argument variables.
4850 *
4851 * @returns New code buffer offset, UINT32_MAX on failure.
4852 * @param pReNative The native recompile state.
4853 * @param off The code buffer offset.
4854 * @param cArgs The number of arguments the function call takes.
4855 * It is presumed that the host register part of these have
4856 * been allocated as such already and won't need moving,
4857 * just freeing.
4858 * @param fKeepVars Mask of variables that should keep their register
4859 * assignments. Caller must take care to handle these.
4860 */
4861DECL_HIDDEN_THROW(uint32_t)
4862iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4863{
4864 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4865
4866 /* fKeepVars will reduce this mask. */
4867 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4868
4869 /*
4870 * Move anything important out of volatile registers.
4871 */
4872 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4873 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4874 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4875#ifdef IEMNATIVE_REG_FIXED_TMP0
4876 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4877#endif
4878#ifdef IEMNATIVE_REG_FIXED_TMP1
4879 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
4880#endif
4881#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4882 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4883#endif
4884 & ~g_afIemNativeCallRegs[cArgs];
4885
4886 fRegsToMove &= pReNative->Core.bmHstRegs;
4887 if (!fRegsToMove)
4888 { /* likely */ }
4889 else
4890 {
4891 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4892 while (fRegsToMove != 0)
4893 {
4894 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4895 fRegsToMove &= ~RT_BIT_32(idxReg);
4896
4897 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4898 {
4899 case kIemNativeWhat_Var:
4900 {
4901 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4902 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4903 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4904 Assert(pVar->idxReg == idxReg);
4905 if (!(RT_BIT_32(idxVar) & fKeepVars))
4906 {
4907 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4908 idxVar, pVar->enmKind, pVar->idxReg));
4909 if (pVar->enmKind != kIemNativeVarKind_Stack)
4910 pVar->idxReg = UINT8_MAX;
4911 else
4912 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4913 }
4914 else
4915 fRegsToFree &= ~RT_BIT_32(idxReg);
4916 continue;
4917 }
4918
4919 case kIemNativeWhat_Arg:
4920 AssertMsgFailed(("What?!?: %u\n", idxReg));
4921 continue;
4922
4923 case kIemNativeWhat_rc:
4924 case kIemNativeWhat_Tmp:
4925 AssertMsgFailed(("Missing free: %u\n", idxReg));
4926 continue;
4927
4928 case kIemNativeWhat_FixedTmp:
4929 case kIemNativeWhat_pVCpuFixed:
4930 case kIemNativeWhat_pCtxFixed:
4931 case kIemNativeWhat_PcShadow:
4932 case kIemNativeWhat_FixedReserved:
4933 case kIemNativeWhat_Invalid:
4934 case kIemNativeWhat_End:
4935 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4936 }
4937 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4938 }
4939 }
4940
4941 /*
4942 * Do the actual freeing.
4943 */
4944 if (pReNative->Core.bmHstRegs & fRegsToFree)
4945 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4946 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4947 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4948
4949 /* If there are guest register shadows in any call-volatile register, we
4950 have to clear the corrsponding guest register masks for each register. */
4951 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4952 if (fHstRegsWithGstShadow)
4953 {
4954 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4955 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
4956 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4957 do
4958 {
4959 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4960 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4961
4962 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4963 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4964 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4965 } while (fHstRegsWithGstShadow != 0);
4966 }
4967
4968 return off;
4969}
4970
4971
4972/**
4973 * Flushes a set of guest register shadow copies.
4974 *
4975 * This is usually done after calling a threaded function or a C-implementation
4976 * of an instruction.
4977 *
4978 * @param pReNative The native recompile state.
4979 * @param fGstRegs Set of guest registers to flush.
4980 */
4981DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4982{
4983 /*
4984 * Reduce the mask by what's currently shadowed
4985 */
4986 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4987 fGstRegs &= bmGstRegShadowsOld;
4988 if (fGstRegs)
4989 {
4990 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4991 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4992 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4993 if (bmGstRegShadowsNew)
4994 {
4995 /*
4996 * Partial.
4997 */
4998 do
4999 {
5000 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5001 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5002 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5003 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
5004 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5005
5006 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
5007 fGstRegs &= ~fInThisHstReg;
5008 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5009 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5010 if (!fGstRegShadowsNew)
5011 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5012 } while (fGstRegs != 0);
5013 }
5014 else
5015 {
5016 /*
5017 * Clear all.
5018 */
5019 do
5020 {
5021 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5022 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5023 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5024 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
5025 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5026
5027 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5028 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5029 } while (fGstRegs != 0);
5030 pReNative->Core.bmHstRegsWithGstShadow = 0;
5031 }
5032 }
5033}
5034
5035
5036/**
5037 * Flushes guest register shadow copies held by a set of host registers.
5038 *
5039 * This is used with the TLB lookup code for ensuring that we don't carry on
5040 * with any guest shadows in volatile registers, as these will get corrupted by
5041 * a TLB miss.
5042 *
5043 * @param pReNative The native recompile state.
5044 * @param fHstRegs Set of host registers to flush guest shadows for.
5045 */
5046DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
5047{
5048 /*
5049 * Reduce the mask by what's currently shadowed.
5050 */
5051 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
5052 fHstRegs &= bmHstRegsWithGstShadowOld;
5053 if (fHstRegs)
5054 {
5055 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
5056 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
5057 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
5058 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
5059 if (bmHstRegsWithGstShadowNew)
5060 {
5061 /*
5062 * Partial (likely).
5063 */
5064 uint64_t fGstShadows = 0;
5065 do
5066 {
5067 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5068 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
5069 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
5070 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
5071
5072 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5073 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5074 fHstRegs &= ~RT_BIT_32(idxHstReg);
5075 } while (fHstRegs != 0);
5076 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
5077 }
5078 else
5079 {
5080 /*
5081 * Clear all.
5082 */
5083 do
5084 {
5085 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5086 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
5087 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
5088 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
5089
5090 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5091 fHstRegs &= ~RT_BIT_32(idxHstReg);
5092 } while (fHstRegs != 0);
5093 pReNative->Core.bmGstRegShadows = 0;
5094 }
5095 }
5096}
5097
5098
5099/**
5100 * Restores guest shadow copies in volatile registers.
5101 *
5102 * This is used after calling a helper function (think TLB miss) to restore the
5103 * register state of volatile registers.
5104 *
5105 * @param pReNative The native recompile state.
5106 * @param off The code buffer offset.
5107 * @param fHstRegsActiveShadows Set of host registers which are allowed to
5108 * be active (allocated) w/o asserting. Hack.
5109 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
5110 * iemNativeVarRestoreVolatileRegsPostHlpCall()
5111 */
5112DECL_HIDDEN_THROW(uint32_t)
5113iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
5114{
5115 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
5116 if (fHstRegs)
5117 {
5118 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
5119 do
5120 {
5121 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5122
5123 /* It's not fatal if a register is active holding a variable that
5124 shadowing a guest register, ASSUMING all pending guest register
5125 writes were flushed prior to the helper call. However, we'll be
5126 emitting duplicate restores, so it wasts code space. */
5127 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
5128 RT_NOREF(fHstRegsActiveShadows);
5129
5130 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5131 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
5132 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
5133 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
5134
5135 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
5136 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
5137
5138 fHstRegs &= ~RT_BIT_32(idxHstReg);
5139 } while (fHstRegs != 0);
5140 }
5141 return off;
5142}
5143
5144
5145#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5146# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
5147static uint32_t iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5148{
5149 /* Compare the shadow with the context value, they should match. */
5150 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
5151 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
5152 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
5153 return off;
5154}
5155# endif
5156
5157/**
5158 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
5159 */
5160static uint32_t
5161iemNativeEmitPcWriteback(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5162{
5163 if (pReNative->Core.offPc)
5164 {
5165# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5166 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5167 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, pReNative->Core.cInstrPcUpdateSkipped);
5168# endif
5169
5170# ifndef IEMNATIVE_REG_FIXED_PC_DBG
5171 /* Allocate a temporary PC register. */
5172 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5173
5174 /* Perform the addition and store the result. */
5175 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5176 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5177
5178 /* Free but don't flush the PC register. */
5179 iemNativeRegFreeTmp(pReNative, idxPcReg);
5180# else
5181 /* Compare the shadow with the context value, they should match. */
5182 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
5183 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
5184# endif
5185
5186 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, pReNative->Core.cInstrPcUpdateSkipped);
5187 pReNative->Core.offPc = 0;
5188 pReNative->Core.cInstrPcUpdateSkipped = 0;
5189 }
5190# if 0 /*def IEMNATIVE_WITH_TB_DEBUG_INFO*/
5191 else
5192 {
5193 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5194 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc);
5195 }
5196# endif
5197
5198 return off;
5199}
5200#endif
5201
5202
5203#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5204
5205
5206/*********************************************************************************************************************************
5207* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
5208*********************************************************************************************************************************/
5209
5210/**
5211 * Info about shadowed guest SIMD register values.
5212 * @see IEMNATIVEGSTSIMDREG
5213 */
5214static struct
5215{
5216 /** Offset in VMCPU of XMM (low 128-bit) registers. */
5217 uint32_t offXmm;
5218 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
5219 uint32_t offYmm;
5220 /** Name (for logging). */
5221 const char *pszName;
5222} const g_aGstSimdShadowInfo[] =
5223{
5224#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
5225 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
5226 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
5227 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
5228 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
5229 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
5230 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
5231 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
5232 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
5233 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
5234 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
5235 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
5236 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
5237 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
5238 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
5239 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
5240 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
5241 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
5242#undef CPUMCTX_OFF_AND_SIZE
5243};
5244AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
5245
5246
5247#ifdef LOG_ENABLED
5248/** Host CPU SIMD register names. */
5249DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
5250{
5251#ifdef RT_ARCH_AMD64
5252 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
5253#elif RT_ARCH_ARM64
5254 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
5255 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
5256#else
5257# error "port me"
5258#endif
5259};
5260#endif
5261
5262
5263DECL_FORCE_INLINE(uint8_t) iemNativeSimdRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, uint8_t idxSimdReg,
5264 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
5265{
5266 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5267
5268 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = enmWhat;
5269 pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows = 0;
5270 RT_NOREF(idxVar);
5271 return idxSimdReg;
5272}
5273
5274
5275/**
5276 * Frees a temporary SIMD register.
5277 *
5278 * Any shadow copies of guest registers assigned to the host register will not
5279 * be flushed by this operation.
5280 */
5281DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
5282{
5283 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
5284 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
5285 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5286 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
5287 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5288}
5289
5290
5291/**
5292 * Locate a register, possibly freeing one up.
5293 *
5294 * This ASSUMES the caller has done the minimal/optimal allocation checks and
5295 * failed.
5296 *
5297 * @returns Host register number on success. Returns UINT8_MAX if no registers
5298 * found, the caller is supposed to deal with this and raise a
5299 * allocation type specific status code (if desired).
5300 *
5301 * @throws VBox status code if we're run into trouble spilling a variable of
5302 * recording debug info. Does NOT throw anything if we're out of
5303 * registers, though.
5304 */
5305static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
5306 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
5307{
5308 //STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
5309 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5310 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5311
5312 AssertFailed();
5313
5314 /*
5315 * Try a freed register that's shadowing a guest register.
5316 */
5317 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
5318 if (fRegs)
5319 {
5320 //STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
5321
5322#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5323 /*
5324 * When we have livness information, we use it to kick out all shadowed
5325 * guest register that will not be needed any more in this TB. If we're
5326 * lucky, this may prevent us from ending up here again.
5327 *
5328 * Note! We must consider the previous entry here so we don't free
5329 * anything that the current threaded function requires (current
5330 * entry is produced by the next threaded function).
5331 */
5332 uint32_t const idxCurCall = pReNative->idxCurCall;
5333 if (idxCurCall > 0)
5334 {
5335 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
5336
5337# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5338 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
5339 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
5340 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
5341#else
5342 /* Construct a mask of the registers not in the read or write state.
5343 Note! We could skips writes, if they aren't from us, as this is just
5344 a hack to prevent trashing registers that have just been written
5345 or will be written when we retire the current instruction. */
5346 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5347 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5348 & IEMLIVENESSBIT_MASK;
5349#endif
5350 /* Merge EFLAGS. */
5351 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
5352 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
5353 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
5354 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
5355 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
5356
5357 /* If it matches any shadowed registers. */
5358 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
5359 {
5360 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
5361 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
5362 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
5363
5364 /* See if we've got any unshadowed registers we can return now. */
5365 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
5366 if (fUnshadowedRegs)
5367 {
5368 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
5369 return (fPreferVolatile
5370 ? ASMBitFirstSetU32(fUnshadowedRegs)
5371 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
5372 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
5373 - 1;
5374 }
5375 }
5376 }
5377#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5378
5379 unsigned const idxReg = (fPreferVolatile
5380 ? ASMBitFirstSetU32(fRegs)
5381 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5382 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
5383 - 1;
5384
5385 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
5386 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
5387 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5388 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
5389 Assert(pReNative->Core.aHstSimdRegs[idxReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5390
5391 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5392 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5393 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5394 return idxReg;
5395 }
5396
5397 /*
5398 * Try free up a variable that's in a register.
5399 *
5400 * We do two rounds here, first evacuating variables we don't need to be
5401 * saved on the stack, then in the second round move things to the stack.
5402 */
5403 //STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
5404 AssertReleaseFailed(); /** @todo */
5405#if 0
5406 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
5407 {
5408 uint32_t fVars = pReNative->Core.bmSimdVars;
5409 while (fVars)
5410 {
5411 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
5412 uint8_t const idxReg = pReNative->Core.aSimdVars[idxVar].idxReg;
5413 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
5414 && (RT_BIT_32(idxReg) & fRegMask)
5415 && ( iLoop == 0
5416 ? pReNative->Core.aSimdVars[idxVar].enmKind != kIemNativeVarKind_Stack
5417 : pReNative->Core.aSimdVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5418 && !pReNative->Core.aSimdVars[idxVar].fRegAcquired)
5419 {
5420 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
5421 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
5422 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5423 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5424 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
5425 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
5426
5427 if (pReNative->Core.aSimdVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5428 {
5429 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
5430 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
5431 }
5432
5433 pReNative->Core.aSimdVars[idxVar].idxReg = UINT8_MAX;
5434 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
5435
5436 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5437 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5438 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5439 return idxReg;
5440 }
5441 fVars &= ~RT_BIT_32(idxVar);
5442 }
5443 }
5444#else
5445 RT_NOREF(poff);
5446#endif
5447
5448 return UINT8_MAX;
5449}
5450
5451
5452/**
5453 * Marks host SIMD register @a idxHstSimdReg as containing a shadow copy of guest
5454 * SIMD register @a enmGstSimdReg.
5455 *
5456 * ASSUMES that caller has made sure @a enmGstSimdReg is not associated with any
5457 * host register before calling.
5458 */
5459DECL_FORCE_INLINE(void)
5460iemNativeSimdRegMarkAsGstSimdRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg, IEMNATIVEGSTSIMDREG enmGstSimdReg, uint32_t off)
5461{
5462 Assert(!(pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg)));
5463 Assert(!pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5464 Assert((unsigned)enmGstSimdReg < (unsigned)kIemNativeGstSimdReg_End);
5465
5466 pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg] = idxHstSimdReg;
5467 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows |= RT_BIT_64(enmGstSimdReg);
5468 pReNative->Core.bmGstSimdRegShadows |= RT_BIT_64(enmGstSimdReg);
5469 pReNative->Core.bmHstSimdRegsWithGstShadow |= RT_BIT_32(idxHstSimdReg);
5470#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5471 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5472 iemNativeDbgInfoAddGuestSimdRegShadowing(pReNative, enmGstSimdReg, idxHstSimdReg);
5473#else
5474 RT_NOREF(off);
5475#endif
5476}
5477
5478
5479/**
5480 * Transfers the guest SIMD register shadow claims of @a enmGstSimdReg from @a idxSimdRegFrom
5481 * to @a idxSimdRegTo.
5482 */
5483DECL_FORCE_INLINE(void)
5484iemNativeSimdRegTransferGstSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxSimdRegFrom, uint8_t idxSimdRegTo,
5485 IEMNATIVEGSTSIMDREG enmGstSimdReg, uint32_t off)
5486{
5487 Assert(pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
5488 Assert(pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg] == idxSimdRegFrom);
5489 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows)
5490 == pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows
5491 && pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5492 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxSimdRegTo].fGstRegShadows)
5493 == pReNative->Core.aHstSimdRegs[idxSimdRegTo].fGstRegShadows);
5494 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdRegFrom))
5495 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows));
5496 Assert( pReNative->Core.aHstSimdRegs[idxSimdRegFrom].enmLoaded
5497 == pReNative->Core.aHstSimdRegs[idxSimdRegTo].enmLoaded);
5498
5499
5500 uint64_t const fGstRegShadowsFrom = pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows & ~RT_BIT_64(enmGstSimdReg);
5501 pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows = fGstRegShadowsFrom;
5502 if (!fGstRegShadowsFrom)
5503 {
5504 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxSimdRegFrom);
5505 pReNative->Core.aHstSimdRegs[idxSimdRegFrom].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5506 }
5507 pReNative->Core.bmHstSimdRegsWithGstShadow |= RT_BIT_32(idxSimdRegTo);
5508 pReNative->Core.aHstSimdRegs[idxSimdRegTo].fGstRegShadows |= RT_BIT_64(enmGstSimdReg);
5509 pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg] = idxSimdRegTo;
5510#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5511 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5512 iemNativeDbgInfoAddGuestSimdRegShadowing(pReNative, enmGstSimdReg, idxSimdRegTo, idxSimdRegFrom);
5513#else
5514 RT_NOREF(off);
5515#endif
5516}
5517
5518
5519/**
5520 * Clear any guest register shadow claims from @a idxHstSimdReg.
5521 *
5522 * The register does not need to be shadowing any guest registers.
5523 */
5524DECL_FORCE_INLINE(void)
5525iemNativeSimdRegClearGstSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg, uint32_t off)
5526{
5527 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5528 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows
5529 && pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstSimdReg_End));
5530 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstSimdReg))
5531 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5532 Assert( !(pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadowDirtyLo128)
5533 && !(pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadowDirtyHi128));
5534
5535#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5536 uint64_t fGstRegs = pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
5537 if (fGstRegs)
5538 {
5539 Assert(fGstRegs < RT_BIT_64(kIemNativeGstSimdReg_End));
5540 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5541 while (fGstRegs)
5542 {
5543 unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5544 fGstRegs &= ~RT_BIT_64(iGstReg);
5545 iemNativeDbgInfoAddGuestSimdRegShadowing(pReNative, (IEMNATIVEGSTSIMDREG)iGstReg, UINT8_MAX, idxHstSimdReg);
5546 }
5547 }
5548#else
5549 RT_NOREF(off);
5550#endif
5551
5552 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstSimdReg);
5553 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
5554 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5555 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5556}
5557
5558
5559/**
5560 * Flushes a set of guest register shadow copies.
5561 *
5562 * This is usually done after calling a threaded function or a C-implementation
5563 * of an instruction.
5564 *
5565 * @param pReNative The native recompile state.
5566 * @param fGstSimdRegs Set of guest SIMD registers to flush.
5567 */
5568DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
5569{
5570 /*
5571 * Reduce the mask by what's currently shadowed
5572 */
5573 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
5574 fGstSimdRegs &= bmGstSimdRegShadows;
5575 if (fGstSimdRegs)
5576 {
5577 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
5578 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
5579 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
5580 if (bmGstSimdRegShadowsNew)
5581 {
5582 /*
5583 * Partial.
5584 */
5585 do
5586 {
5587 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5588 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5589 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5590 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5591 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5592 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5593
5594 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
5595 fGstSimdRegs &= ~fInThisHstReg;
5596 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5597 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5598 if (!fGstRegShadowsNew)
5599 {
5600 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5601 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5602 }
5603 } while (fGstSimdRegs != 0);
5604 }
5605 else
5606 {
5607 /*
5608 * Clear all.
5609 */
5610 do
5611 {
5612 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5613 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5614 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5615 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5616 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5617 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5618
5619 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5620 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
5621 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5622 } while (fGstSimdRegs != 0);
5623 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
5624 }
5625 }
5626}
5627
5628
5629/**
5630 * Allocates a temporary host SIMD register.
5631 *
5632 * This may emit code to save register content onto the stack in order to free
5633 * up a register.
5634 *
5635 * @returns The host register number; throws VBox status code on failure,
5636 * so no need to check the return value.
5637 * @param pReNative The native recompile state.
5638 * @param poff Pointer to the variable with the code buffer position.
5639 * This will be update if we need to move a variable from
5640 * register to stack in order to satisfy the request.
5641 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5642 * registers (@c true, default) or the other way around
5643 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5644 */
5645DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
5646{
5647 /*
5648 * Try find a completely unused register, preferably a call-volatile one.
5649 */
5650 uint8_t idxSimdReg;
5651 uint32_t fRegs = ~pReNative->Core.bmHstRegs
5652 & ~pReNative->Core.bmHstRegsWithGstShadow
5653 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
5654 if (fRegs)
5655 {
5656 if (fPreferVolatile)
5657 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5658 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5659 else
5660 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5661 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5662 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5663 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5664 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5665 }
5666 else
5667 {
5668 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
5669 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5670 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5671 }
5672
5673 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5674 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5675}
5676
5677
5678/**
5679 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
5680 * registers.
5681 *
5682 * @returns The host register number; throws VBox status code on failure,
5683 * so no need to check the return value.
5684 * @param pReNative The native recompile state.
5685 * @param poff Pointer to the variable with the code buffer position.
5686 * This will be update if we need to move a variable from
5687 * register to stack in order to satisfy the request.
5688 * @param fRegMask Mask of acceptable registers.
5689 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5690 * registers (@c true, default) or the other way around
5691 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5692 */
5693DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
5694 bool fPreferVolatile /*= true*/)
5695{
5696 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5697 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5698
5699 /*
5700 * Try find a completely unused register, preferably a call-volatile one.
5701 */
5702 uint8_t idxSimdReg;
5703 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
5704 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5705 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
5706 & fRegMask;
5707 if (fRegs)
5708 {
5709 if (fPreferVolatile)
5710 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5711 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5712 else
5713 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5714 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5715 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5716 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5717 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5718 }
5719 else
5720 {
5721 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
5722 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5723 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5724 }
5725
5726 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5727 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5728}
5729
5730
5731static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdRegDst,
5732 uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
5733{
5734 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
5735 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegDst].enmLoaded == enmLoadSzDst
5736 || pReNative->Core.aHstSimdRegs[idxHstSimdRegDst].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5737 {
5738# ifdef RT_ARCH_ARM64
5739 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
5740 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
5741# endif
5742
5743 switch (enmLoadSzDst)
5744 {
5745 case kIemNativeGstSimdRegLdStSz_256:
5746 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5747 case kIemNativeGstSimdRegLdStSz_Low128:
5748 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5749 case kIemNativeGstSimdRegLdStSz_High128:
5750 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst + 1, idxHstSimdRegSrc + 1);
5751 default:
5752 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5753 }
5754
5755 pReNative->Core.aHstSimdRegs[idxHstSimdRegDst].enmLoaded = enmLoadSzDst;
5756 return off;
5757 }
5758 else
5759 {
5760 /* Complicated stuff where the source is currently missing something, later. */
5761 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5762 }
5763
5764 return off;
5765}
5766
5767
5768/**
5769 * Allocates a temporary host SIMD register for keeping a guest
5770 * SIMD register value.
5771 *
5772 * Since we may already have a register holding the guest register value,
5773 * code will be emitted to do the loading if that's not the case. Code may also
5774 * be emitted if we have to free up a register to satify the request.
5775 *
5776 * @returns The host register number; throws VBox status code on failure, so no
5777 * need to check the return value.
5778 * @param pReNative The native recompile state.
5779 * @param poff Pointer to the variable with the code buffer
5780 * position. This will be update if we need to move a
5781 * variable from register to stack in order to satisfy
5782 * the request.
5783 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
5784 * @param enmIntendedUse How the caller will be using the host register.
5785 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
5786 * register is okay (default). The ASSUMPTION here is
5787 * that the caller has already flushed all volatile
5788 * registers, so this is only applied if we allocate a
5789 * new register.
5790 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
5791 */
5792DECL_HIDDEN_THROW(uint8_t)
5793iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
5794 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
5795 bool fNoVolatileRegs /*= false*/)
5796{
5797 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
5798#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
5799 AssertMsg( pReNative->idxCurCall == 0
5800 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5801 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5802 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
5803 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5804 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
5805 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
5806#endif
5807#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5808 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
5809#endif
5810 uint32_t const fRegMask = !fNoVolatileRegs
5811 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
5812 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
5813
5814 /*
5815 * First check if the guest register value is already in a host register.
5816 */
5817 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
5818 {
5819 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5820 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
5821 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
5822 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
5823
5824 /* It's not supposed to be allocated... */
5825 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
5826 {
5827 /*
5828 * If the register will trash the guest shadow copy, try find a
5829 * completely unused register we can use instead. If that fails,
5830 * we need to disassociate the host reg from the guest reg.
5831 */
5832 /** @todo would be nice to know if preserving the register is in any way helpful. */
5833 /* If the purpose is calculations, try duplicate the register value as
5834 we'll be clobbering the shadow. */
5835 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
5836 && ( ~pReNative->Core.bmHstSimdRegs
5837 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5838 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
5839 {
5840 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
5841
5842 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5843
5844 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5845 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5846 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5847 idxSimdReg = idxRegNew;
5848 }
5849 /* If the current register matches the restrictions, go ahead and allocate
5850 it for the caller. */
5851 else if (fRegMask & RT_BIT_32(idxSimdReg))
5852 {
5853 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5854 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
5855 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5856 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
5857 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5858 else
5859 {
5860 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
5861 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
5862 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
5863 }
5864 }
5865 /* Otherwise, allocate a register that satisfies the caller and transfer
5866 the shadowing if compatible with the intended use. (This basically
5867 means the call wants a non-volatile register (RSP push/pop scenario).) */
5868 else
5869 {
5870 Assert(fNoVolatileRegs);
5871 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
5872 !fNoVolatileRegs
5873 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
5874 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5875 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5876 {
5877 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5878 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
5879 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
5880 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5881 }
5882 else
5883 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5884 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5885 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5886 idxSimdReg = idxRegNew;
5887 }
5888 }
5889 else
5890 {
5891 /*
5892 * Oops. Shadowed guest register already allocated!
5893 *
5894 * Allocate a new register, copy the value and, if updating, the
5895 * guest shadow copy assignment to the new register.
5896 */
5897 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5898 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
5899 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
5900 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
5901
5902 /** @todo share register for readonly access. */
5903 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
5904 enmIntendedUse == kIemNativeGstRegUse_Calculation);
5905
5906 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5907 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5908 else
5909 {
5910 /** @todo This is a bit unsafe to mark the register already as loaded even though there is nothing written to it yet. */
5911 pReNative->Core.aHstSimdRegs[idxRegNew].enmLoaded = enmLoadSz;
5912 }
5913
5914 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5915 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5916 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
5917 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5918 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5919 else
5920 {
5921 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5922 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
5923 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5924 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5925 }
5926 idxSimdReg = idxRegNew;
5927 }
5928 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
5929
5930#ifdef VBOX_STRICT
5931 /* Strict builds: Check that the value is correct. */
5932 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
5933#endif
5934
5935 return idxSimdReg;
5936 }
5937
5938 /*
5939 * Allocate a new register, load it with the guest value and designate it as a copy of the
5940 */
5941 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
5942
5943 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5944 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
5945 else
5946 {
5947 /** @todo This is a bit unsafe to mark the register already as loaded even though there is nothing written to it yet. */
5948 pReNative->Core.aHstSimdRegs[idxRegNew].enmLoaded = enmLoadSz;
5949 }
5950
5951 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5952 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
5953
5954 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
5955 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5956
5957 return idxRegNew;
5958}
5959
5960
5961/**
5962 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
5963 *
5964 * @returns New code bufferoffset.
5965 * @param pReNative The native recompile state.
5966 * @param off Current code buffer position.
5967 * @param idxGstSimdReg The guest SIMD register to flush.
5968 */
5969static uint32_t iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGstSimdReg)
5970{
5971 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxGstSimdReg];
5972
5973 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
5974 g_aGstSimdShadowInfo[idxGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
5975 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, idxGstSimdReg),
5976 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, idxGstSimdReg)));
5977
5978#ifdef RT_ARCH_AMD64
5979# error "Port me"
5980#elif defined(RT_ARCH_ARM64)
5981 /* ASSUMING there are two consecutive host registers to store the potential 256-bit guest register. */
5982 Assert(!(idxHstSimdReg & 0x1));
5983 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, idxGstSimdReg))
5984 {
5985 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5986 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
5987 off = iemNativeEmitSimdStoreVecRegToVCpuU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[idxGstSimdReg].offXmm);
5988 }
5989
5990 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, idxGstSimdReg))
5991 {
5992 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5993 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
5994 off = iemNativeEmitSimdStoreVecRegToVCpuU128(pReNative, off, idxHstSimdReg + 1, g_aGstSimdShadowInfo[idxGstSimdReg].offYmm);
5995 }
5996#endif
5997
5998 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, idxGstSimdReg);
5999 return off;
6000}
6001
6002#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6003
6004
6005
6006/*********************************************************************************************************************************
6007* Code emitters for flushing pending guest register writes and sanity checks *
6008*********************************************************************************************************************************/
6009
6010/**
6011 * Flushes delayed write of a specific guest register.
6012 *
6013 * This must be called prior to calling CImpl functions and any helpers that use
6014 * the guest state (like raising exceptions) and such.
6015 *
6016 * This optimization has not yet been implemented. The first target would be
6017 * RIP updates, since these are the most common ones.
6018 */
6019DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6020 IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
6021{
6022#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6023 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
6024#endif
6025
6026#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6027 if ( enmClass == kIemNativeGstRegRef_XReg
6028 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxReg))
6029 {
6030 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, idxReg);
6031 /* Flush the shadows as the register needs to be reloaded (there is no guarantee right now, that the referenced register doesn't change). */
6032 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxReg];
6033
6034 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
6035 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxReg)));
6036 }
6037#endif
6038 RT_NOREF(pReNative, enmClass, idxReg);
6039 return off;
6040}
6041
6042
6043/**
6044 * Flushes any delayed guest register writes.
6045 *
6046 * This must be called prior to calling CImpl functions and any helpers that use
6047 * the guest state (like raising exceptions) and such.
6048 *
6049 * This optimization has not yet been implemented. The first target would be
6050 * RIP updates, since these are the most common ones.
6051 */
6052DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept /*= 0*/,
6053 bool fFlushShadows /*= true*/)
6054{
6055#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6056 if (!(fGstShwExcept & kIemNativeGstReg_Pc))
6057 off = iemNativeEmitPcWriteback(pReNative, off);
6058#else
6059 RT_NOREF(pReNative, fGstShwExcept);
6060#endif
6061
6062#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6063 /** @todo This doesn't mix well with fGstShwExcept but we ignore this for now and just flush everything. */
6064 for (uint8_t idxGstSimdReg = 0; idxGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo); idxGstSimdReg++)
6065 {
6066 Assert( (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg)
6067 || !IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg)));
6068
6069 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg))
6070 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, idxGstSimdReg);
6071
6072 if ( fFlushShadows
6073 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg))
6074 {
6075 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxGstSimdReg];
6076
6077 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
6078 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg)));
6079 }
6080 }
6081#else
6082 RT_NOREF(pReNative, fGstShwExcept, fFlushShadows);
6083#endif
6084
6085 return off;
6086}
6087
6088
6089#ifdef VBOX_STRICT
6090/**
6091 * Does internal register allocator sanity checks.
6092 */
6093static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
6094{
6095 /*
6096 * Iterate host registers building a guest shadowing set.
6097 */
6098 uint64_t bmGstRegShadows = 0;
6099 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
6100 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
6101 while (bmHstRegsWithGstShadow)
6102 {
6103 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
6104 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
6105 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
6106
6107 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
6108 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
6109 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
6110 bmGstRegShadows |= fThisGstRegShadows;
6111 while (fThisGstRegShadows)
6112 {
6113 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
6114 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
6115 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
6116 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
6117 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
6118 }
6119 }
6120 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
6121 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
6122 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
6123
6124 /*
6125 * Now the other way around, checking the guest to host index array.
6126 */
6127 bmHstRegsWithGstShadow = 0;
6128 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
6129 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
6130 while (bmGstRegShadows)
6131 {
6132 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
6133 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
6134 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
6135
6136 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
6137 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
6138 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
6139 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
6140 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
6141 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
6142 }
6143 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
6144 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
6145 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
6146}
6147#endif
6148
6149
6150/*********************************************************************************************************************************
6151* Code Emitters (larger snippets) *
6152*********************************************************************************************************************************/
6153
6154/**
6155 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
6156 * extending to 64-bit width.
6157 *
6158 * @returns New code buffer offset on success, UINT32_MAX on failure.
6159 * @param pReNative .
6160 * @param off The current code buffer position.
6161 * @param idxHstReg The host register to load the guest register value into.
6162 * @param enmGstReg The guest register to load.
6163 *
6164 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6165 * that is something the caller needs to do if applicable.
6166 */
6167DECL_HIDDEN_THROW(uint32_t)
6168iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
6169{
6170 Assert((unsigned)enmGstReg < RT_ELEMENTS(g_aGstShadowInfo));
6171 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
6172
6173 switch (g_aGstShadowInfo[enmGstReg].cb)
6174 {
6175 case sizeof(uint64_t):
6176 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6177 case sizeof(uint32_t):
6178 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6179 case sizeof(uint16_t):
6180 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6181#if 0 /* not present in the table. */
6182 case sizeof(uint8_t):
6183 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6184#endif
6185 default:
6186 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6187 }
6188}
6189
6190
6191#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6192/**
6193 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
6194 *
6195 * @returns New code buffer offset on success, UINT32_MAX on failure.
6196 * @param pReNative The recompiler state.
6197 * @param off The current code buffer position.
6198 * @param idxHstSimdReg The host register to load the guest register value into.
6199 * @param enmGstSimdReg The guest register to load.
6200 * @param enmLoadSz The load size of the register.
6201 *
6202 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6203 * that is something the caller needs to do if applicable.
6204 */
6205DECL_HIDDEN_THROW(uint32_t)
6206iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
6207 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6208{
6209 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
6210
6211 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
6212 switch (enmLoadSz)
6213 {
6214 case kIemNativeGstSimdRegLdStSz_256:
6215 return iemNativeEmitSimdLoadVecRegFromVCpuU256(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm,
6216 g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6217 case kIemNativeGstSimdRegLdStSz_Low128:
6218 return iemNativeEmitSimdLoadVecRegFromVCpuU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6219 case kIemNativeGstSimdRegLdStSz_High128:
6220 return iemNativeEmitSimdLoadVecRegFromVCpuU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6221 default:
6222 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6223 }
6224}
6225#endif
6226
6227#ifdef VBOX_STRICT
6228/**
6229 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
6230 *
6231 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6232 * Trashes EFLAGS on AMD64.
6233 */
6234static uint32_t
6235iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
6236{
6237# ifdef RT_ARCH_AMD64
6238 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6239
6240 /* rol reg64, 32 */
6241 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6242 pbCodeBuf[off++] = 0xc1;
6243 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6244 pbCodeBuf[off++] = 32;
6245
6246 /* test reg32, ffffffffh */
6247 if (idxReg >= 8)
6248 pbCodeBuf[off++] = X86_OP_REX_B;
6249 pbCodeBuf[off++] = 0xf7;
6250 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6251 pbCodeBuf[off++] = 0xff;
6252 pbCodeBuf[off++] = 0xff;
6253 pbCodeBuf[off++] = 0xff;
6254 pbCodeBuf[off++] = 0xff;
6255
6256 /* je/jz +1 */
6257 pbCodeBuf[off++] = 0x74;
6258 pbCodeBuf[off++] = 0x01;
6259
6260 /* int3 */
6261 pbCodeBuf[off++] = 0xcc;
6262
6263 /* rol reg64, 32 */
6264 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6265 pbCodeBuf[off++] = 0xc1;
6266 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6267 pbCodeBuf[off++] = 32;
6268
6269# elif defined(RT_ARCH_ARM64)
6270 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6271 /* lsr tmp0, reg64, #32 */
6272 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
6273 /* cbz tmp0, +1 */
6274 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6275 /* brk #0x1100 */
6276 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
6277
6278# else
6279# error "Port me!"
6280# endif
6281 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6282 return off;
6283}
6284#endif /* VBOX_STRICT */
6285
6286
6287#ifdef VBOX_STRICT
6288/**
6289 * Emitting code that checks that the content of register @a idxReg is the same
6290 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
6291 * instruction if that's not the case.
6292 *
6293 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6294 * Trashes EFLAGS on AMD64.
6295 */
6296static uint32_t
6297iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
6298{
6299# ifdef RT_ARCH_AMD64
6300 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6301
6302 /* cmp reg, [mem] */
6303 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
6304 {
6305 if (idxReg >= 8)
6306 pbCodeBuf[off++] = X86_OP_REX_R;
6307 pbCodeBuf[off++] = 0x38;
6308 }
6309 else
6310 {
6311 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
6312 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
6313 else
6314 {
6315 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
6316 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6317 else
6318 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
6319 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
6320 if (idxReg >= 8)
6321 pbCodeBuf[off++] = X86_OP_REX_R;
6322 }
6323 pbCodeBuf[off++] = 0x39;
6324 }
6325 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
6326
6327 /* je/jz +1 */
6328 pbCodeBuf[off++] = 0x74;
6329 pbCodeBuf[off++] = 0x01;
6330
6331 /* int3 */
6332 pbCodeBuf[off++] = 0xcc;
6333
6334 /* For values smaller than the register size, we must check that the rest
6335 of the register is all zeros. */
6336 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
6337 {
6338 /* test reg64, imm32 */
6339 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6340 pbCodeBuf[off++] = 0xf7;
6341 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6342 pbCodeBuf[off++] = 0;
6343 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
6344 pbCodeBuf[off++] = 0xff;
6345 pbCodeBuf[off++] = 0xff;
6346
6347 /* je/jz +1 */
6348 pbCodeBuf[off++] = 0x74;
6349 pbCodeBuf[off++] = 0x01;
6350
6351 /* int3 */
6352 pbCodeBuf[off++] = 0xcc;
6353 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6354 }
6355 else
6356 {
6357 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6358 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
6359 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
6360 }
6361
6362# elif defined(RT_ARCH_ARM64)
6363 /* mov TMP0, [gstreg] */
6364 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
6365
6366 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6367 /* sub tmp0, tmp0, idxReg */
6368 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
6369 /* cbz tmp0, +1 */
6370 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6371 /* brk #0x1000+enmGstReg */
6372 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
6373 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6374
6375# else
6376# error "Port me!"
6377# endif
6378 return off;
6379}
6380
6381
6382# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6383/**
6384 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
6385 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
6386 * instruction if that's not the case.
6387 *
6388 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
6389 * Trashes EFLAGS on AMD64.
6390 */
6391static uint32_t
6392iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg, IEMNATIVEGSTSIMDREG enmGstSimdReg,
6393 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6394{
6395# ifdef RT_ARCH_AMD64
6396# error "Port me!"
6397# elif defined(RT_ARCH_ARM64)
6398 /* mov vectmp0, [gstreg] */
6399 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
6400
6401 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6402 {
6403 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6404 /* eor vectmp0, vectmp0, idxSimdReg */
6405 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6406 /* cnt vectmp0, vectmp0, #0*/
6407 pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0);
6408 /* umov tmp0, vectmp0.D[0] */
6409 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6410 0 /*idxElem*/, kArmv8InstrUmovSz_U64);
6411 /* cbz tmp0, +1 */
6412 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6413 /* brk #0x1000+enmGstReg */
6414 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6415 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6416 }
6417
6418 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6419 {
6420 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6421 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
6422 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg);
6423 /* cnt vectmp0 + 1, vectmp0 + 1, #0*/
6424 pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1);
6425 /* umov tmp0, (vectmp0 + 1).D[0] */
6426 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
6427 0 /*idxElem*/, kArmv8InstrUmovSz_U64);
6428 /* cbz tmp0, +1 */
6429 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6430 /* brk #0x1000+enmGstReg */
6431 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6432 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6433 }
6434
6435# else
6436# error "Port me!"
6437# endif
6438 return off;
6439}
6440# endif
6441#endif /* VBOX_STRICT */
6442
6443
6444#ifdef VBOX_STRICT
6445/**
6446 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
6447 * important bits.
6448 *
6449 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6450 * Trashes EFLAGS on AMD64.
6451 */
6452static uint32_t
6453iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
6454{
6455 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6456 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
6457 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
6458 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
6459
6460#ifdef RT_ARCH_AMD64
6461 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6462
6463 /* je/jz +1 */
6464 pbCodeBuf[off++] = 0x74;
6465 pbCodeBuf[off++] = 0x01;
6466
6467 /* int3 */
6468 pbCodeBuf[off++] = 0xcc;
6469
6470# elif defined(RT_ARCH_ARM64)
6471 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6472
6473 /* b.eq +1 */
6474 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
6475 /* brk #0x2000 */
6476 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
6477
6478# else
6479# error "Port me!"
6480# endif
6481 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6482
6483 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6484 return off;
6485}
6486#endif /* VBOX_STRICT */
6487
6488
6489/**
6490 * Emits a code for checking the return code of a call and rcPassUp, returning
6491 * from the code if either are non-zero.
6492 */
6493DECL_HIDDEN_THROW(uint32_t)
6494iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6495{
6496#ifdef RT_ARCH_AMD64
6497 /*
6498 * AMD64: eax = call status code.
6499 */
6500
6501 /* edx = rcPassUp */
6502 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6503# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6504 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
6505# endif
6506
6507 /* edx = eax | rcPassUp */
6508 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6509 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
6510 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
6511 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6512
6513 /* Jump to non-zero status return path. */
6514 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
6515
6516 /* done. */
6517
6518#elif RT_ARCH_ARM64
6519 /*
6520 * ARM64: w0 = call status code.
6521 */
6522# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6523 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
6524# endif
6525 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6526
6527 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6528
6529 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
6530
6531 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6532 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6533 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
6534
6535#else
6536# error "port me"
6537#endif
6538 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6539 RT_NOREF_PV(idxInstr);
6540 return off;
6541}
6542
6543
6544/**
6545 * Emits code to check if the content of @a idxAddrReg is a canonical address,
6546 * raising a \#GP(0) if it isn't.
6547 *
6548 * @returns New code buffer offset, UINT32_MAX on failure.
6549 * @param pReNative The native recompile state.
6550 * @param off The code buffer offset.
6551 * @param idxAddrReg The host register with the address to check.
6552 * @param idxInstr The current instruction.
6553 */
6554DECL_HIDDEN_THROW(uint32_t)
6555iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
6556{
6557 /*
6558 * Make sure we don't have any outstanding guest register writes as we may
6559 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6560 */
6561 off = iemNativeRegFlushPendingWrites(pReNative, off);
6562
6563#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6564 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6565#else
6566 RT_NOREF(idxInstr);
6567#endif
6568
6569#ifdef RT_ARCH_AMD64
6570 /*
6571 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
6572 * return raisexcpt();
6573 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
6574 */
6575 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6576
6577 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
6578 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
6579 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
6580 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
6581 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6582
6583 iemNativeRegFreeTmp(pReNative, iTmpReg);
6584
6585#elif defined(RT_ARCH_ARM64)
6586 /*
6587 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
6588 * return raisexcpt();
6589 * ----
6590 * mov x1, 0x800000000000
6591 * add x1, x0, x1
6592 * cmp xzr, x1, lsr 48
6593 * b.ne .Lraisexcpt
6594 */
6595 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6596
6597 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
6598 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
6599 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
6600 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6601
6602 iemNativeRegFreeTmp(pReNative, iTmpReg);
6603
6604#else
6605# error "Port me"
6606#endif
6607 return off;
6608}
6609
6610
6611/**
6612 * Emits code to check if that the content of @a idxAddrReg is within the limit
6613 * of CS, raising a \#GP(0) if it isn't.
6614 *
6615 * @returns New code buffer offset; throws VBox status code on error.
6616 * @param pReNative The native recompile state.
6617 * @param off The code buffer offset.
6618 * @param idxAddrReg The host register (32-bit) with the address to
6619 * check.
6620 * @param idxInstr The current instruction.
6621 */
6622DECL_HIDDEN_THROW(uint32_t)
6623iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6624 uint8_t idxAddrReg, uint8_t idxInstr)
6625{
6626 /*
6627 * Make sure we don't have any outstanding guest register writes as we may
6628 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6629 */
6630 off = iemNativeRegFlushPendingWrites(pReNative, off);
6631
6632#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6633 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6634#else
6635 RT_NOREF(idxInstr);
6636#endif
6637
6638 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
6639 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
6640 kIemNativeGstRegUse_ReadOnly);
6641
6642 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
6643 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6644
6645 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
6646 return off;
6647}
6648
6649
6650/**
6651 * Converts IEM_CIMPL_F_XXX flags into a guest register shadow copy flush mask.
6652 *
6653 * @returns The flush mask.
6654 * @param fCImpl The IEM_CIMPL_F_XXX flags.
6655 * @param fGstShwFlush The starting flush mask.
6656 */
6657DECL_FORCE_INLINE(uint64_t) iemNativeCImplFlagsToGuestShadowFlushMask(uint32_t fCImpl, uint64_t fGstShwFlush)
6658{
6659 if (fCImpl & IEM_CIMPL_F_BRANCH_FAR)
6660 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_CS)
6661 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_CS)
6662 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS);
6663 if (fCImpl & IEM_CIMPL_F_BRANCH_STACK_FAR)
6664 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP)
6665 | RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_SS)
6666 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_SS)
6667 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_SS);
6668 else if (fCImpl & IEM_CIMPL_F_BRANCH_STACK)
6669 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP);
6670 if (fCImpl & (IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_STATUS_FLAGS | IEM_CIMPL_F_INHIBIT_SHADOW))
6671 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
6672 return fGstShwFlush;
6673}
6674
6675
6676/**
6677 * Emits a call to a CImpl function or something similar.
6678 */
6679DECL_HIDDEN_THROW(uint32_t)
6680iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
6681 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
6682{
6683 /* Writeback everything. */
6684 off = iemNativeRegFlushPendingWrites(pReNative, off);
6685
6686 /*
6687 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
6688 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
6689 */
6690 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
6691 fGstShwFlush
6692 | RT_BIT_64(kIemNativeGstReg_Pc)
6693 | RT_BIT_64(kIemNativeGstReg_EFlags));
6694 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6695
6696 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6697
6698 /*
6699 * Load the parameters.
6700 */
6701#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
6702 /* Special code the hidden VBOXSTRICTRC pointer. */
6703 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6704 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6705 if (cAddParams > 0)
6706 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
6707 if (cAddParams > 1)
6708 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
6709 if (cAddParams > 2)
6710 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
6711 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6712
6713#else
6714 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6715 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6716 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6717 if (cAddParams > 0)
6718 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
6719 if (cAddParams > 1)
6720 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
6721 if (cAddParams > 2)
6722# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
6723 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
6724# else
6725 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
6726# endif
6727#endif
6728
6729 /*
6730 * Make the call.
6731 */
6732 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
6733
6734#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6735 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6736#endif
6737
6738 /*
6739 * Check the status code.
6740 */
6741 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
6742}
6743
6744
6745/**
6746 * Emits a call to a threaded worker function.
6747 */
6748DECL_HIDDEN_THROW(uint32_t)
6749iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6750{
6751 /* We don't know what the threaded function is doing so we must flush all pending writes. */
6752 off = iemNativeRegFlushPendingWrites(pReNative, off);
6753
6754 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
6755 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6756
6757#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6758 /* The threaded function may throw / long jmp, so set current instruction
6759 number if we're counting. */
6760 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6761#endif
6762
6763 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
6764
6765#ifdef RT_ARCH_AMD64
6766 /* Load the parameters and emit the call. */
6767# ifdef RT_OS_WINDOWS
6768# ifndef VBOXSTRICTRC_STRICT_ENABLED
6769 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6770 if (cParams > 0)
6771 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
6772 if (cParams > 1)
6773 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
6774 if (cParams > 2)
6775 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
6776# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
6777 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
6778 if (cParams > 0)
6779 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
6780 if (cParams > 1)
6781 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
6782 if (cParams > 2)
6783 {
6784 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
6785 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
6786 }
6787 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6788# endif /* VBOXSTRICTRC_STRICT_ENABLED */
6789# else
6790 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6791 if (cParams > 0)
6792 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
6793 if (cParams > 1)
6794 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
6795 if (cParams > 2)
6796 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
6797# endif
6798
6799 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6800
6801# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6802 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6803# endif
6804
6805#elif RT_ARCH_ARM64
6806 /*
6807 * ARM64:
6808 */
6809 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6810 if (cParams > 0)
6811 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
6812 if (cParams > 1)
6813 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
6814 if (cParams > 2)
6815 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
6816
6817 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6818
6819#else
6820# error "port me"
6821#endif
6822
6823 /*
6824 * Check the status code.
6825 */
6826 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
6827
6828 return off;
6829}
6830
6831#ifdef VBOX_WITH_STATISTICS
6832/**
6833 * Emits code to update the thread call statistics.
6834 */
6835DECL_INLINE_THROW(uint32_t)
6836iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6837{
6838 /*
6839 * Update threaded function stats.
6840 */
6841 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
6842 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
6843# if defined(RT_ARCH_ARM64)
6844 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6845 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6846 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
6847 iemNativeRegFreeTmp(pReNative, idxTmp1);
6848 iemNativeRegFreeTmp(pReNative, idxTmp2);
6849# else
6850 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
6851# endif
6852 return off;
6853}
6854#endif /* VBOX_WITH_STATISTICS */
6855
6856
6857/**
6858 * Emits the code at the CheckBranchMiss label.
6859 */
6860static uint32_t iemNativeEmitCheckBranchMiss(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6861{
6862 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_CheckBranchMiss);
6863 if (idxLabel != UINT32_MAX)
6864 {
6865 iemNativeLabelDefine(pReNative, idxLabel, off);
6866
6867 /* int iemNativeHlpCheckBranchMiss(PVMCPUCC pVCpu) */
6868 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6869 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpCheckBranchMiss);
6870
6871 /* jump back to the return sequence. */
6872 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6873 }
6874 return off;
6875}
6876
6877
6878/**
6879 * Emits the code at the NeedCsLimChecking label.
6880 */
6881static uint32_t iemNativeEmitNeedCsLimChecking(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6882{
6883 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NeedCsLimChecking);
6884 if (idxLabel != UINT32_MAX)
6885 {
6886 iemNativeLabelDefine(pReNative, idxLabel, off);
6887
6888 /* int iemNativeHlpNeedCsLimChecking(PVMCPUCC pVCpu) */
6889 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6890 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpNeedCsLimChecking);
6891
6892 /* jump back to the return sequence. */
6893 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6894 }
6895 return off;
6896}
6897
6898
6899/**
6900 * Emits the code at the ObsoleteTb label.
6901 */
6902static uint32_t iemNativeEmitObsoleteTb(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6903{
6904 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ObsoleteTb);
6905 if (idxLabel != UINT32_MAX)
6906 {
6907 iemNativeLabelDefine(pReNative, idxLabel, off);
6908
6909 /* int iemNativeHlpObsoleteTb(PVMCPUCC pVCpu) */
6910 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6911 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpObsoleteTb);
6912
6913 /* jump back to the return sequence. */
6914 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6915 }
6916 return off;
6917}
6918
6919
6920/**
6921 * Emits the code at the RaiseGP0 label.
6922 */
6923static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6924{
6925 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseGp0);
6926 if (idxLabel != UINT32_MAX)
6927 {
6928 iemNativeLabelDefine(pReNative, idxLabel, off);
6929
6930 /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu) */
6931 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6932 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
6933
6934 /* jump back to the return sequence. */
6935 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6936 }
6937 return off;
6938}
6939
6940
6941/**
6942 * Emits the code at the RaiseNm label.
6943 */
6944static uint32_t iemNativeEmitRaiseNm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6945{
6946 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseNm);
6947 if (idxLabel != UINT32_MAX)
6948 {
6949 iemNativeLabelDefine(pReNative, idxLabel, off);
6950
6951 /* iemNativeHlpExecRaiseNm(PVMCPUCC pVCpu) */
6952 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6953 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseNm);
6954
6955 /* jump back to the return sequence. */
6956 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6957 }
6958 return off;
6959}
6960
6961
6962/**
6963 * Emits the code at the RaiseUd label.
6964 */
6965static uint32_t iemNativeEmitRaiseUd(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6966{
6967 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseUd);
6968 if (idxLabel != UINT32_MAX)
6969 {
6970 iemNativeLabelDefine(pReNative, idxLabel, off);
6971
6972 /* iemNativeHlpExecRaiseUd(PVMCPUCC pVCpu) */
6973 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6974 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseUd);
6975
6976 /* jump back to the return sequence. */
6977 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6978 }
6979 return off;
6980}
6981
6982
6983/**
6984 * Emits the code at the RaiseMf label.
6985 */
6986static uint32_t iemNativeEmitRaiseMf(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6987{
6988 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseMf);
6989 if (idxLabel != UINT32_MAX)
6990 {
6991 iemNativeLabelDefine(pReNative, idxLabel, off);
6992
6993 /* iemNativeHlpExecRaiseMf(PVMCPUCC pVCpu) */
6994 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6995 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseMf);
6996
6997 /* jump back to the return sequence. */
6998 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6999 }
7000 return off;
7001}
7002
7003
7004/**
7005 * Emits the code at the RaiseXf label.
7006 */
7007static uint32_t iemNativeEmitRaiseXf(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7008{
7009 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseXf);
7010 if (idxLabel != UINT32_MAX)
7011 {
7012 iemNativeLabelDefine(pReNative, idxLabel, off);
7013
7014 /* iemNativeHlpExecRaiseXf(PVMCPUCC pVCpu) */
7015 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7016 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseXf);
7017
7018 /* jump back to the return sequence. */
7019 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7020 }
7021 return off;
7022}
7023
7024
7025/**
7026 * Emits the code at the ReturnWithFlags label (returns
7027 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
7028 */
7029static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7030{
7031 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
7032 if (idxLabel != UINT32_MAX)
7033 {
7034 iemNativeLabelDefine(pReNative, idxLabel, off);
7035
7036 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
7037
7038 /* jump back to the return sequence. */
7039 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7040 }
7041 return off;
7042}
7043
7044
7045/**
7046 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
7047 */
7048static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7049{
7050 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
7051 if (idxLabel != UINT32_MAX)
7052 {
7053 iemNativeLabelDefine(pReNative, idxLabel, off);
7054
7055 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
7056
7057 /* jump back to the return sequence. */
7058 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7059 }
7060 return off;
7061}
7062
7063
7064/**
7065 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
7066 */
7067static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7068{
7069 /*
7070 * Generate the rc + rcPassUp fiddling code if needed.
7071 */
7072 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
7073 if (idxLabel != UINT32_MAX)
7074 {
7075 iemNativeLabelDefine(pReNative, idxLabel, off);
7076
7077 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
7078#ifdef RT_ARCH_AMD64
7079# ifdef RT_OS_WINDOWS
7080# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7081 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
7082# endif
7083 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
7084 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
7085# else
7086 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
7087 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
7088# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7089 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
7090# endif
7091# endif
7092# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7093 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
7094# endif
7095
7096#else
7097 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
7098 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7099 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
7100#endif
7101
7102 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
7103 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7104 }
7105 return off;
7106}
7107
7108
7109/**
7110 * Emits a standard epilog.
7111 */
7112static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
7113{
7114 *pidxReturnLabel = UINT32_MAX;
7115
7116 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
7117 off = iemNativeRegFlushPendingWrites(pReNative, off);
7118
7119 /*
7120 * Successful return, so clear the return register (eax, w0).
7121 */
7122 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
7123
7124 /*
7125 * Define label for common return point.
7126 */
7127 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
7128 *pidxReturnLabel = idxReturn;
7129
7130 /*
7131 * Restore registers and return.
7132 */
7133#ifdef RT_ARCH_AMD64
7134 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
7135
7136 /* Reposition esp at the r15 restore point. */
7137 pbCodeBuf[off++] = X86_OP_REX_W;
7138 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
7139 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
7140 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
7141
7142 /* Pop non-volatile registers and return */
7143 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
7144 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
7145 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
7146 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
7147 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
7148 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
7149 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
7150 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
7151# ifdef RT_OS_WINDOWS
7152 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
7153 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
7154# endif
7155 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
7156 pbCodeBuf[off++] = 0xc9; /* leave */
7157 pbCodeBuf[off++] = 0xc3; /* ret */
7158 pbCodeBuf[off++] = 0xcc; /* int3 poison */
7159
7160#elif RT_ARCH_ARM64
7161 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7162
7163 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
7164 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
7165 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
7166 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
7167 IEMNATIVE_FRAME_VAR_SIZE / 8);
7168 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
7169 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7170 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
7171 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7172 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
7173 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7174 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
7175 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7176 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
7177 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7178 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
7179 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
7180
7181 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
7182 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
7183 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
7184 IEMNATIVE_FRAME_SAVE_REG_SIZE);
7185
7186 /* retab / ret */
7187# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
7188 if (1)
7189 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
7190 else
7191# endif
7192 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
7193
7194#else
7195# error "port me"
7196#endif
7197 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7198
7199 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
7200}
7201
7202
7203/**
7204 * Emits a standard prolog.
7205 */
7206static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7207{
7208#ifdef RT_ARCH_AMD64
7209 /*
7210 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
7211 * reserving 64 bytes for stack variables plus 4 non-register argument
7212 * slots. Fixed register assignment: xBX = pReNative;
7213 *
7214 * Since we always do the same register spilling, we can use the same
7215 * unwind description for all the code.
7216 */
7217 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7218 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
7219 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
7220 pbCodeBuf[off++] = 0x8b;
7221 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
7222 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
7223 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
7224# ifdef RT_OS_WINDOWS
7225 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
7226 pbCodeBuf[off++] = 0x8b;
7227 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
7228 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
7229 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
7230# else
7231 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
7232 pbCodeBuf[off++] = 0x8b;
7233 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
7234# endif
7235 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
7236 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
7237 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
7238 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
7239 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
7240 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
7241 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
7242 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
7243
7244# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
7245 /* Save the frame pointer. */
7246 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
7247# endif
7248
7249 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
7250 X86_GREG_xSP,
7251 IEMNATIVE_FRAME_ALIGN_SIZE
7252 + IEMNATIVE_FRAME_VAR_SIZE
7253 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
7254 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
7255 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
7256 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
7257 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
7258
7259#elif RT_ARCH_ARM64
7260 /*
7261 * We set up a stack frame exactly like on x86, only we have to push the
7262 * return address our selves here. We save all non-volatile registers.
7263 */
7264 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
7265
7266# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
7267 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
7268 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
7269 * in any way conditional, so just emitting this instructions now and hoping for the best... */
7270 /* pacibsp */
7271 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
7272# endif
7273
7274 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
7275 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
7276 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
7277 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
7278 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
7279 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
7280 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7281 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
7282 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7283 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
7284 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7285 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
7286 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7287 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
7288 /* Save the BP and LR (ret address) registers at the top of the frame. */
7289 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7290 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
7291 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
7292 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
7293 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
7294 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
7295
7296 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
7297 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
7298
7299 /* mov r28, r0 */
7300 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
7301 /* mov r27, r1 */
7302 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
7303
7304# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
7305 /* Save the frame pointer. */
7306 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
7307 ARMV8_A64_REG_X2);
7308# endif
7309
7310#else
7311# error "port me"
7312#endif
7313 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7314 return off;
7315}
7316
7317
7318
7319
7320/*********************************************************************************************************************************
7321* Emitters for IEM_MC_BEGIN and IEM_MC_END. *
7322*********************************************************************************************************************************/
7323
7324#define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
7325 { \
7326 Assert(pReNative->Core.bmVars == 0); \
7327 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
7328 Assert(pReNative->Core.bmStack == 0); \
7329 pReNative->fMc = (a_fMcFlags); \
7330 pReNative->fCImpl = (a_fCImplFlags); \
7331 pReNative->cArgs = ((a_cArgs) + iemNativeArgGetHiddenArgCount(pReNative))
7332
7333/** We have to get to the end in recompilation mode, as otherwise we won't
7334 * generate code for all the IEM_MC_IF_XXX branches. */
7335#define IEM_MC_END() \
7336 iemNativeVarFreeAll(pReNative); \
7337 } return off
7338
7339
7340
7341/*********************************************************************************************************************************
7342* Native Emitter Support. *
7343*********************************************************************************************************************************/
7344
7345
7346#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
7347
7348#define IEM_MC_NATIVE_ELSE() } else {
7349
7350#define IEM_MC_NATIVE_ENDIF() } ((void)0)
7351
7352
7353#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
7354 off = a_fnEmitter(pReNative, off)
7355
7356#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
7357 off = a_fnEmitter(pReNative, off, (a0))
7358
7359#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
7360 off = a_fnEmitter(pReNative, off, (a0), (a1))
7361
7362#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
7363 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
7364
7365#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
7366 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
7367
7368#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
7369 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
7370
7371#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
7372 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
7373
7374#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
7375 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
7376
7377#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
7378 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
7379
7380
7381
7382/*********************************************************************************************************************************
7383* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
7384*********************************************************************************************************************************/
7385
7386#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
7387 pReNative->fMc = 0; \
7388 pReNative->fCImpl = (a_fFlags); \
7389 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
7390
7391
7392#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
7393 pReNative->fMc = 0; \
7394 pReNative->fCImpl = (a_fFlags); \
7395 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
7396
7397DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7398 uint8_t idxInstr, uint64_t a_fGstShwFlush,
7399 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
7400{
7401 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
7402}
7403
7404
7405#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
7406 pReNative->fMc = 0; \
7407 pReNative->fCImpl = (a_fFlags); \
7408 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
7409 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
7410
7411DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7412 uint8_t idxInstr, uint64_t a_fGstShwFlush,
7413 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
7414{
7415 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
7416}
7417
7418
7419#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
7420 pReNative->fMc = 0; \
7421 pReNative->fCImpl = (a_fFlags); \
7422 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
7423 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
7424
7425DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7426 uint8_t idxInstr, uint64_t a_fGstShwFlush,
7427 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
7428 uint64_t uArg2)
7429{
7430 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
7431}
7432
7433
7434
7435/*********************************************************************************************************************************
7436* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
7437*********************************************************************************************************************************/
7438
7439/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
7440 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
7441DECL_INLINE_THROW(uint32_t)
7442iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7443{
7444 /*
7445 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
7446 * return with special status code and make the execution loop deal with
7447 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
7448 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
7449 * could continue w/o interruption, it probably will drop into the
7450 * debugger, so not worth the effort of trying to services it here and we
7451 * just lump it in with the handling of the others.
7452 *
7453 * To simplify the code and the register state management even more (wrt
7454 * immediate in AND operation), we always update the flags and skip the
7455 * extra check associated conditional jump.
7456 */
7457 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
7458 <= UINT32_MAX);
7459#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
7460 AssertMsg( pReNative->idxCurCall == 0
7461 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)),
7462 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)));
7463#endif
7464
7465 /*
7466 * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
7467 * any pending register writes must be flushed.
7468 */
7469 off = iemNativeRegFlushPendingWrites(pReNative, off);
7470
7471 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7472 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
7473 true /*fSkipLivenessAssert*/);
7474 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
7475 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
7476 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
7477 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
7478 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
7479
7480 /* Free but don't flush the EFLAGS register. */
7481 iemNativeRegFreeTmp(pReNative, idxEflReg);
7482
7483 return off;
7484}
7485
7486
7487/** The VINF_SUCCESS dummy. */
7488template<int const a_rcNormal>
7489DECL_FORCE_INLINE(uint32_t)
7490iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
7491{
7492 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
7493 if (a_rcNormal != VINF_SUCCESS)
7494 {
7495#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7496 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7497#else
7498 RT_NOREF_PV(idxInstr);
7499#endif
7500
7501 /* As this code returns from the TB any pending register writes must be flushed. */
7502 off = iemNativeRegFlushPendingWrites(pReNative, off);
7503
7504 return iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_ReturnBreak);
7505 }
7506 return off;
7507}
7508
7509
7510#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
7511 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
7512 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7513
7514#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
7515 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
7516 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7517 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7518
7519/** Same as iemRegAddToRip64AndFinishingNoFlags. */
7520DECL_INLINE_THROW(uint32_t)
7521iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
7522{
7523#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
7524# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
7525 if (!pReNative->Core.offPc)
7526 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7527# endif
7528
7529 /* Allocate a temporary PC register. */
7530 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
7531
7532 /* Perform the addition and store the result. */
7533 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
7534 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7535
7536 /* Free but don't flush the PC register. */
7537 iemNativeRegFreeTmp(pReNative, idxPcReg);
7538#endif
7539
7540#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7541 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
7542
7543 pReNative->Core.offPc += cbInstr;
7544# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
7545 off = iemNativePcAdjustCheck(pReNative, off);
7546# endif
7547 if (pReNative->cCondDepth)
7548 off = iemNativeEmitPcWriteback(pReNative, off);
7549 else
7550 pReNative->Core.cInstrPcUpdateSkipped++;
7551#endif
7552
7553 return off;
7554}
7555
7556
7557#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
7558 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
7559 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7560
7561#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
7562 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
7563 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7564 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7565
7566/** Same as iemRegAddToEip32AndFinishingNoFlags. */
7567DECL_INLINE_THROW(uint32_t)
7568iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
7569{
7570#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
7571# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
7572 if (!pReNative->Core.offPc)
7573 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7574# endif
7575
7576 /* Allocate a temporary PC register. */
7577 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
7578
7579 /* Perform the addition and store the result. */
7580 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
7581 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7582
7583 /* Free but don't flush the PC register. */
7584 iemNativeRegFreeTmp(pReNative, idxPcReg);
7585#endif
7586
7587#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7588 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
7589
7590 pReNative->Core.offPc += cbInstr;
7591# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
7592 off = iemNativePcAdjustCheck(pReNative, off);
7593# endif
7594 if (pReNative->cCondDepth)
7595 off = iemNativeEmitPcWriteback(pReNative, off);
7596 else
7597 pReNative->Core.cInstrPcUpdateSkipped++;
7598#endif
7599
7600 return off;
7601}
7602
7603
7604#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
7605 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
7606 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7607
7608#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
7609 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
7610 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7611 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7612
7613/** Same as iemRegAddToIp16AndFinishingNoFlags. */
7614DECL_INLINE_THROW(uint32_t)
7615iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
7616{
7617#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
7618# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
7619 if (!pReNative->Core.offPc)
7620 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7621# endif
7622
7623 /* Allocate a temporary PC register. */
7624 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
7625
7626 /* Perform the addition and store the result. */
7627 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
7628 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
7629 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7630
7631 /* Free but don't flush the PC register. */
7632 iemNativeRegFreeTmp(pReNative, idxPcReg);
7633#endif
7634
7635#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7636 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
7637
7638 pReNative->Core.offPc += cbInstr;
7639# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
7640 off = iemNativePcAdjustCheck(pReNative, off);
7641# endif
7642 if (pReNative->cCondDepth)
7643 off = iemNativeEmitPcWriteback(pReNative, off);
7644 else
7645 pReNative->Core.cInstrPcUpdateSkipped++;
7646#endif
7647
7648 return off;
7649}
7650
7651
7652
7653/*********************************************************************************************************************************
7654* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
7655*********************************************************************************************************************************/
7656
7657#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
7658 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
7659 (a_enmEffOpSize), pCallEntry->idxInstr); \
7660 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7661
7662#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
7663 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
7664 (a_enmEffOpSize), pCallEntry->idxInstr); \
7665 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7666 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7667
7668#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
7669 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
7670 IEMMODE_16BIT, pCallEntry->idxInstr); \
7671 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7672
7673#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
7674 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
7675 IEMMODE_16BIT, pCallEntry->idxInstr); \
7676 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7677 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7678
7679#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
7680 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
7681 IEMMODE_64BIT, pCallEntry->idxInstr); \
7682 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7683
7684#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
7685 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
7686 IEMMODE_64BIT, pCallEntry->idxInstr); \
7687 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7688 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7689
7690/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
7691 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
7692 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
7693DECL_INLINE_THROW(uint32_t)
7694iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
7695 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
7696{
7697 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
7698
7699 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
7700 off = iemNativeRegFlushPendingWrites(pReNative, off);
7701
7702#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7703 Assert(pReNative->Core.offPc == 0);
7704
7705 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
7706#endif
7707
7708 /* Allocate a temporary PC register. */
7709 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
7710
7711 /* Perform the addition. */
7712 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
7713
7714 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
7715 {
7716 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
7717 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
7718 }
7719 else
7720 {
7721 /* Just truncate the result to 16-bit IP. */
7722 Assert(enmEffOpSize == IEMMODE_16BIT);
7723 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
7724 }
7725 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7726
7727 /* Free but don't flush the PC register. */
7728 iemNativeRegFreeTmp(pReNative, idxPcReg);
7729
7730 return off;
7731}
7732
7733
7734#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
7735 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
7736 (a_enmEffOpSize), pCallEntry->idxInstr); \
7737 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7738
7739#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
7740 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
7741 (a_enmEffOpSize), pCallEntry->idxInstr); \
7742 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7743 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7744
7745#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
7746 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
7747 IEMMODE_16BIT, pCallEntry->idxInstr); \
7748 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7749
7750#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
7751 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
7752 IEMMODE_16BIT, pCallEntry->idxInstr); \
7753 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7754 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7755
7756#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
7757 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
7758 IEMMODE_32BIT, pCallEntry->idxInstr); \
7759 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7760
7761#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
7762 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
7763 IEMMODE_32BIT, pCallEntry->idxInstr); \
7764 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7765 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7766
7767/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
7768 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
7769 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
7770DECL_INLINE_THROW(uint32_t)
7771iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
7772 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
7773{
7774 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
7775
7776 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
7777 off = iemNativeRegFlushPendingWrites(pReNative, off);
7778
7779#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7780 Assert(pReNative->Core.offPc == 0);
7781
7782 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
7783#endif
7784
7785 /* Allocate a temporary PC register. */
7786 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
7787
7788 /* Perform the addition. */
7789 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
7790
7791 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
7792 if (enmEffOpSize == IEMMODE_16BIT)
7793 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
7794
7795 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
7796/** @todo we can skip this in 32-bit FLAT mode. */
7797 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
7798
7799 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7800
7801 /* Free but don't flush the PC register. */
7802 iemNativeRegFreeTmp(pReNative, idxPcReg);
7803
7804 return off;
7805}
7806
7807
7808#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
7809 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
7810 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7811
7812#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
7813 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
7814 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7815 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7816
7817#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
7818 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
7819 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7820
7821#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
7822 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
7823 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7824 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7825
7826#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
7827 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
7828 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7829
7830#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
7831 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
7832 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7833 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7834
7835/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
7836DECL_INLINE_THROW(uint32_t)
7837iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7838 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
7839{
7840 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
7841 off = iemNativeRegFlushPendingWrites(pReNative, off);
7842
7843#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7844 Assert(pReNative->Core.offPc == 0);
7845
7846 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
7847#endif
7848
7849 /* Allocate a temporary PC register. */
7850 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
7851
7852 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
7853 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
7854 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
7855 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
7856 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7857
7858 /* Free but don't flush the PC register. */
7859 iemNativeRegFreeTmp(pReNative, idxPcReg);
7860
7861 return off;
7862}
7863
7864
7865
7866/*********************************************************************************************************************************
7867* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
7868*********************************************************************************************************************************/
7869
7870/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
7871#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
7872 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
7873
7874/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
7875#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
7876 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
7877
7878/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
7879#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
7880 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
7881
7882/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
7883 * clears flags. */
7884#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
7885 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
7886 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
7887
7888/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
7889 * clears flags. */
7890#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
7891 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
7892 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
7893
7894/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
7895 * clears flags. */
7896#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
7897 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
7898 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
7899
7900#undef IEM_MC_SET_RIP_U16_AND_FINISH
7901
7902
7903/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
7904#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
7905 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
7906
7907/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
7908#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
7909 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
7910
7911/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
7912 * clears flags. */
7913#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
7914 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
7915 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
7916
7917/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
7918 * and clears flags. */
7919#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
7920 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
7921 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
7922
7923#undef IEM_MC_SET_RIP_U32_AND_FINISH
7924
7925
7926/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
7927#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
7928 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
7929
7930/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
7931 * and clears flags. */
7932#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
7933 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
7934 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
7935
7936#undef IEM_MC_SET_RIP_U64_AND_FINISH
7937
7938
7939/** Same as iemRegRipJumpU16AndFinishNoFlags,
7940 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
7941DECL_INLINE_THROW(uint32_t)
7942iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
7943 uint8_t idxInstr, uint8_t cbVar)
7944{
7945 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
7946 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
7947
7948 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
7949 off = iemNativeRegFlushPendingWrites(pReNative, off);
7950
7951#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7952 Assert(pReNative->Core.offPc == 0);
7953
7954 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
7955#endif
7956
7957 /* Get a register with the new PC loaded from idxVarPc.
7958 Note! This ASSUMES that the high bits of the GPR is zeroed. */
7959 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
7960
7961 /* Check limit (may #GP(0) + exit TB). */
7962 if (!f64Bit)
7963/** @todo we can skip this test in FLAT 32-bit mode. */
7964 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
7965 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
7966 else if (cbVar > sizeof(uint32_t))
7967 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
7968
7969 /* Store the result. */
7970 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7971
7972 iemNativeVarRegisterRelease(pReNative, idxVarPc);
7973 /** @todo implictly free the variable? */
7974
7975 return off;
7976}
7977
7978
7979
7980/*********************************************************************************************************************************
7981* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
7982*********************************************************************************************************************************/
7983
7984#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
7985 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
7986
7987/**
7988 * Emits code to check if a \#NM exception should be raised.
7989 *
7990 * @returns New code buffer offset, UINT32_MAX on failure.
7991 * @param pReNative The native recompile state.
7992 * @param off The code buffer offset.
7993 * @param idxInstr The current instruction.
7994 */
7995DECL_INLINE_THROW(uint32_t)
7996iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
7997{
7998 /*
7999 * Make sure we don't have any outstanding guest register writes as we may
8000 * raise an #NM and all guest register must be up to date in CPUMCTX.
8001 *
8002 * @todo r=aeichner Can we postpone this to the RaiseNm path?
8003 */
8004 off = iemNativeRegFlushPendingWrites(pReNative, off);
8005
8006#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8007 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8008#else
8009 RT_NOREF(idxInstr);
8010#endif
8011
8012 /* Allocate a temporary CR0 register. */
8013 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
8014 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
8015
8016 /*
8017 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
8018 * return raisexcpt();
8019 */
8020 /* Test and jump. */
8021 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM | X86_CR0_TS, idxLabelRaiseNm);
8022
8023 /* Free but don't flush the CR0 register. */
8024 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
8025
8026 return off;
8027}
8028
8029
8030#define IEM_MC_MAYBE_RAISE_FPU_XCPT() \
8031 off = iemNativeEmitMaybeFpuException(pReNative, off, pCallEntry->idxInstr)
8032
8033/**
8034 * Emits code to check if a \#MF exception should be raised.
8035 *
8036 * @returns New code buffer offset, UINT32_MAX on failure.
8037 * @param pReNative The native recompile state.
8038 * @param off The code buffer offset.
8039 * @param idxInstr The current instruction.
8040 */
8041DECL_INLINE_THROW(uint32_t)
8042iemNativeEmitMaybeRaiseFpuException(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
8043{
8044 /*
8045 * Make sure we don't have any outstanding guest register writes as we may
8046 * raise an #MF and all guest register must be up to date in CPUMCTX.
8047 *
8048 * @todo r=aeichner Can we postpone this to the RaiseMf path?
8049 */
8050 off = iemNativeRegFlushPendingWrites(pReNative, off);
8051
8052#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8053 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8054#else
8055 RT_NOREF(idxInstr);
8056#endif
8057
8058 /* Allocate a temporary FSW register. */
8059 uint8_t const idxFpuFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw, kIemNativeGstRegUse_ReadOnly);
8060 uint8_t const idxLabelRaiseMf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseMf);
8061
8062 /*
8063 * if (FSW & X86_FSW_ES != 0)
8064 * return raisexcpt();
8065 */
8066 /* Test and jump. */
8067 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxFpuFswReg, X86_FSW_ES, idxLabelRaiseMf);
8068
8069 /* Free but don't flush the FSW register. */
8070 iemNativeRegFreeTmp(pReNative, idxFpuFswReg);
8071
8072 return off;
8073}
8074
8075
8076#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
8077 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
8078
8079/**
8080 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
8081 *
8082 * @returns New code buffer offset, UINT32_MAX on failure.
8083 * @param pReNative The native recompile state.
8084 * @param off The code buffer offset.
8085 * @param idxInstr The current instruction.
8086 */
8087DECL_INLINE_THROW(uint32_t)
8088iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
8089{
8090 /*
8091 * Make sure we don't have any outstanding guest register writes as we may
8092 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
8093 *
8094 * @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path?
8095 */
8096 off = iemNativeRegFlushPendingWrites(pReNative, off, false /*fFlushShadows*/);
8097
8098#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8099 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8100#else
8101 RT_NOREF(idxInstr);
8102#endif
8103
8104 /* Allocate a temporary CR0 and CR4 register. */
8105 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
8106 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4, kIemNativeGstRegUse_ReadOnly);
8107 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
8108 uint8_t const idxLabelRaiseUd = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseUd);
8109
8110 /** @todo r=aeichner Optimize this more later to have less compares and branches,
8111 * (see IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() in IEMMc.h but check that it has some
8112 * actual performance benefit first). */
8113 /*
8114 * if (cr0 & X86_CR0_EM)
8115 * return raisexcpt();
8116 */
8117 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxCr0Reg, X86_CR0_EM_BIT, idxLabelRaiseUd);
8118 /*
8119 * if (!(cr4 & X86_CR4_OSFXSR))
8120 * return raisexcpt();
8121 */
8122 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxCr4Reg, X86_CR4_OSFXSR_BIT, idxLabelRaiseUd);
8123 /*
8124 * if (cr0 & X86_CR0_TS)
8125 * return raisexcpt();
8126 */
8127 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxCr0Reg, X86_CR0_TS_BIT, idxLabelRaiseNm);
8128
8129 /* Free but don't flush the CR0 and CR4 register. */
8130 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
8131 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
8132
8133 return off;
8134}
8135
8136
8137#define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
8138 off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
8139
8140/**
8141 * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
8142 *
8143 * @returns New code buffer offset, UINT32_MAX on failure.
8144 * @param pReNative The native recompile state.
8145 * @param off The code buffer offset.
8146 * @param idxInstr The current instruction.
8147 */
8148DECL_INLINE_THROW(uint32_t)
8149iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
8150{
8151 /*
8152 * Make sure we don't have any outstanding guest register writes as we may
8153 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
8154 *
8155 * @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path?
8156 */
8157 off = iemNativeRegFlushPendingWrites(pReNative, off, false /*fFlushShadows*/);
8158
8159#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8160 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8161#else
8162 RT_NOREF(idxInstr);
8163#endif
8164
8165 /* Allocate a temporary CR0, CR4 and XCR0 register. */
8166 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
8167 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4, kIemNativeGstRegUse_ReadOnly);
8168 uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0, kIemNativeGstRegUse_ReadOnly);
8169 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
8170 uint8_t const idxLabelRaiseUd = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseUd);
8171
8172#if 1
8173 off = iemNativeEmitBrk(pReNative, off, 0x4223); /** @todo Test this when AVX gets actually available. */
8174#endif
8175
8176 /** @todo r=aeichner Optimize this more later to have less compares and branches,
8177 * (see IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() in IEMMc.h but check that it has some
8178 * actual performance benefit first). */
8179 /*
8180 * if ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE))
8181 * return raisexcpt();
8182 */
8183 const uint8_t idxRegTmp = iemNativeRegAllocTmpImm(pReNative, &off, XSAVE_C_YMM | XSAVE_C_SSE);
8184 off = iemNativeEmitOrGprByGpr(pReNative, off, idxRegTmp, idxXcr0Reg);
8185 off = iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(pReNative, off, idxRegTmp, XSAVE_C_YMM | XSAVE_C_SSE, idxLabelRaiseUd);
8186 iemNativeRegFreeTmp(pReNative, idxRegTmp);
8187
8188 /*
8189 * if (!(cr4 & X86_CR4_OSXSAVE))
8190 * return raisexcpt();
8191 */
8192 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxCr4Reg, X86_CR4_OSXSAVE_BIT, idxLabelRaiseUd);
8193 /*
8194 * if (cr0 & X86_CR0_TS)
8195 * return raisexcpt();
8196 */
8197 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxCr0Reg, X86_CR0_TS_BIT, idxLabelRaiseNm);
8198
8199 /* Free but don't flush the CR0, CR4 and XCR0 register. */
8200 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
8201 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
8202 iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
8203
8204 return off;
8205}
8206
8207
8208#define IEM_MC_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT() \
8209 off = iemNativeEmitRaiseSseAvxSimdFpXcpt(pReNative, off, pCallEntry->idxInstr)
8210
8211/**
8212 * Emits code to raise a SIMD floating point (either \#UD or \#XF) should be raised.
8213 *
8214 * @returns New code buffer offset, UINT32_MAX on failure.
8215 * @param pReNative The native recompile state.
8216 * @param off The code buffer offset.
8217 * @param idxInstr The current instruction.
8218 */
8219DECL_INLINE_THROW(uint32_t)
8220iemNativeEmitRaiseSseAvxSimdFpXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
8221{
8222 /*
8223 * Make sure we don't have any outstanding guest register writes as we may
8224 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
8225 *
8226 * @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path?
8227 */
8228 off = iemNativeRegFlushPendingWrites(pReNative, off);
8229
8230#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8231 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8232#else
8233 RT_NOREF(idxInstr);
8234#endif
8235
8236 /* Allocate a temporary CR4 register. */
8237 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4, kIemNativeGstRegUse_ReadOnly);
8238 uint8_t const idxLabelRaiseXf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseXf);
8239 uint8_t const idxLabelRaiseUd = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseUd);
8240
8241 /*
8242 * if (!(cr4 & X86_CR4_OSXMMEEXCPT))
8243 * return raisexcpt();
8244 */
8245 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxCr4Reg, X86_CR4_OSXMMEEXCPT_BIT, idxLabelRaiseXf);
8246
8247 /* raise \#UD exception unconditionally. */
8248 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelRaiseUd);
8249
8250 /* Free but don't flush the CR4 register. */
8251 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
8252
8253 return off;
8254}
8255
8256
8257
8258/*********************************************************************************************************************************
8259* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
8260*********************************************************************************************************************************/
8261
8262/**
8263 * Pushes an IEM_MC_IF_XXX onto the condition stack.
8264 *
8265 * @returns Pointer to the condition stack entry on success, NULL on failure
8266 * (too many nestings)
8267 */
8268DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
8269{
8270#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8271 *poff = iemNativeRegFlushPendingWrites(pReNative, *poff);
8272#endif
8273
8274 uint32_t const idxStack = pReNative->cCondDepth;
8275 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
8276
8277 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
8278 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
8279
8280 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
8281 pEntry->fInElse = false;
8282 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
8283 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
8284
8285 return pEntry;
8286}
8287
8288
8289/**
8290 * Start of the if-block, snapshotting the register and variable state.
8291 */
8292DECL_INLINE_THROW(void)
8293iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
8294{
8295 Assert(offIfBlock != UINT32_MAX);
8296 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
8297 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
8298 Assert(!pEntry->fInElse);
8299
8300 /* Define the start of the IF block if request or for disassembly purposes. */
8301 if (idxLabelIf != UINT32_MAX)
8302 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
8303#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8304 else
8305 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
8306#else
8307 RT_NOREF(offIfBlock);
8308#endif
8309
8310#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8311 Assert(pReNative->Core.offPc == 0);
8312#endif
8313
8314 /* Copy the initial state so we can restore it in the 'else' block. */
8315 pEntry->InitialState = pReNative->Core;
8316}
8317
8318
8319#define IEM_MC_ELSE() } while (0); \
8320 off = iemNativeEmitElse(pReNative, off); \
8321 do {
8322
8323/** Emits code related to IEM_MC_ELSE. */
8324DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
8325{
8326 /* Check sanity and get the conditional stack entry. */
8327 Assert(off != UINT32_MAX);
8328 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
8329 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
8330 Assert(!pEntry->fInElse);
8331
8332 /* Jump to the endif */
8333 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
8334
8335 /* Define the else label and enter the else part of the condition. */
8336 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
8337 pEntry->fInElse = true;
8338
8339#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8340 Assert(pReNative->Core.offPc == 0);
8341#endif
8342
8343 /* Snapshot the core state so we can do a merge at the endif and restore
8344 the snapshot we took at the start of the if-block. */
8345 pEntry->IfFinalState = pReNative->Core;
8346 pReNative->Core = pEntry->InitialState;
8347
8348 return off;
8349}
8350
8351
8352#define IEM_MC_ENDIF() } while (0); \
8353 off = iemNativeEmitEndIf(pReNative, off)
8354
8355/** Emits code related to IEM_MC_ENDIF. */
8356DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
8357{
8358 /* Check sanity and get the conditional stack entry. */
8359 Assert(off != UINT32_MAX);
8360 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
8361 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
8362
8363#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8364 Assert(pReNative->Core.offPc == 0);
8365#endif
8366
8367 /*
8368 * Now we have find common group with the core state at the end of the
8369 * if-final. Use the smallest common denominator and just drop anything
8370 * that isn't the same in both states.
8371 */
8372 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
8373 * which is why we're doing this at the end of the else-block.
8374 * But we'd need more info about future for that to be worth the effort. */
8375 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
8376 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
8377 {
8378 /* shadow guest stuff first. */
8379 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
8380 if (fGstRegs)
8381 {
8382 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
8383 do
8384 {
8385 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
8386 fGstRegs &= ~RT_BIT_64(idxGstReg);
8387
8388 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
8389 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
8390 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
8391 {
8392 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
8393 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
8394 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
8395 }
8396 } while (fGstRegs);
8397 }
8398 else
8399 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
8400
8401 /* Check variables next. For now we must require them to be identical
8402 or stuff we can recreate. */
8403 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
8404 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
8405 if (fVars)
8406 {
8407 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
8408 do
8409 {
8410 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
8411 fVars &= ~RT_BIT_32(idxVar);
8412
8413 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
8414 {
8415 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
8416 continue;
8417 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
8418 {
8419 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8420 if (idxHstReg != UINT8_MAX)
8421 {
8422 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8423 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8424 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
8425 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
8426 }
8427 continue;
8428 }
8429 }
8430 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
8431 continue;
8432
8433 /* Irreconcilable, so drop it. */
8434 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8435 if (idxHstReg != UINT8_MAX)
8436 {
8437 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8438 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8439 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
8440 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
8441 }
8442 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
8443 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8444 } while (fVars);
8445 }
8446
8447 /* Finally, check that the host register allocations matches. */
8448 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
8449 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
8450 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
8451 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
8452 }
8453
8454 /*
8455 * Define the endif label and maybe the else one if we're still in the 'if' part.
8456 */
8457 if (!pEntry->fInElse)
8458 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
8459 else
8460 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
8461 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
8462
8463 /* Pop the conditional stack.*/
8464 pReNative->cCondDepth -= 1;
8465
8466 return off;
8467}
8468
8469
8470#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
8471 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
8472 do {
8473
8474/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
8475DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
8476{
8477 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8478
8479 /* Get the eflags. */
8480 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8481 kIemNativeGstRegUse_ReadOnly);
8482
8483 /* Test and jump. */
8484 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
8485
8486 /* Free but don't flush the EFlags register. */
8487 iemNativeRegFreeTmp(pReNative, idxEflReg);
8488
8489 /* Make a copy of the core state now as we start the if-block. */
8490 iemNativeCondStartIfBlock(pReNative, off);
8491
8492 return off;
8493}
8494
8495
8496#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
8497 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
8498 do {
8499
8500/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
8501DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
8502{
8503 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8504
8505 /* Get the eflags. */
8506 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8507 kIemNativeGstRegUse_ReadOnly);
8508
8509 /* Test and jump. */
8510 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
8511
8512 /* Free but don't flush the EFlags register. */
8513 iemNativeRegFreeTmp(pReNative, idxEflReg);
8514
8515 /* Make a copy of the core state now as we start the if-block. */
8516 iemNativeCondStartIfBlock(pReNative, off);
8517
8518 return off;
8519}
8520
8521
8522#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
8523 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
8524 do {
8525
8526/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
8527DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
8528{
8529 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8530
8531 /* Get the eflags. */
8532 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8533 kIemNativeGstRegUse_ReadOnly);
8534
8535 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
8536 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
8537
8538 /* Test and jump. */
8539 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
8540
8541 /* Free but don't flush the EFlags register. */
8542 iemNativeRegFreeTmp(pReNative, idxEflReg);
8543
8544 /* Make a copy of the core state now as we start the if-block. */
8545 iemNativeCondStartIfBlock(pReNative, off);
8546
8547 return off;
8548}
8549
8550
8551#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
8552 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
8553 do {
8554
8555/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
8556DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
8557{
8558 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8559
8560 /* Get the eflags. */
8561 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8562 kIemNativeGstRegUse_ReadOnly);
8563
8564 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
8565 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
8566
8567 /* Test and jump. */
8568 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
8569
8570 /* Free but don't flush the EFlags register. */
8571 iemNativeRegFreeTmp(pReNative, idxEflReg);
8572
8573 /* Make a copy of the core state now as we start the if-block. */
8574 iemNativeCondStartIfBlock(pReNative, off);
8575
8576 return off;
8577}
8578
8579
8580#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
8581 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
8582 do {
8583
8584#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
8585 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
8586 do {
8587
8588/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
8589DECL_INLINE_THROW(uint32_t)
8590iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8591 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
8592{
8593 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8594
8595 /* Get the eflags. */
8596 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8597 kIemNativeGstRegUse_ReadOnly);
8598
8599 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
8600 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
8601
8602 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
8603 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
8604 Assert(iBitNo1 != iBitNo2);
8605
8606#ifdef RT_ARCH_AMD64
8607 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
8608
8609 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
8610 if (iBitNo1 > iBitNo2)
8611 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
8612 else
8613 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
8614 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
8615
8616#elif defined(RT_ARCH_ARM64)
8617 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
8618 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8619
8620 /* and tmpreg, eflreg, #1<<iBitNo1 */
8621 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
8622
8623 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
8624 if (iBitNo1 > iBitNo2)
8625 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
8626 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
8627 else
8628 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
8629 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
8630
8631 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8632
8633#else
8634# error "Port me"
8635#endif
8636
8637 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
8638 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
8639 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
8640
8641 /* Free but don't flush the EFlags and tmp registers. */
8642 iemNativeRegFreeTmp(pReNative, idxTmpReg);
8643 iemNativeRegFreeTmp(pReNative, idxEflReg);
8644
8645 /* Make a copy of the core state now as we start the if-block. */
8646 iemNativeCondStartIfBlock(pReNative, off);
8647
8648 return off;
8649}
8650
8651
8652#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
8653 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
8654 do {
8655
8656#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
8657 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
8658 do {
8659
8660/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
8661 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
8662DECL_INLINE_THROW(uint32_t)
8663iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
8664 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
8665{
8666 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8667
8668 /* We need an if-block label for the non-inverted variant. */
8669 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
8670 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
8671
8672 /* Get the eflags. */
8673 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8674 kIemNativeGstRegUse_ReadOnly);
8675
8676 /* Translate the flag masks to bit numbers. */
8677 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
8678 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
8679
8680 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
8681 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
8682 Assert(iBitNo1 != iBitNo);
8683
8684 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
8685 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
8686 Assert(iBitNo2 != iBitNo);
8687 Assert(iBitNo2 != iBitNo1);
8688
8689#ifdef RT_ARCH_AMD64
8690 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
8691#elif defined(RT_ARCH_ARM64)
8692 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
8693#endif
8694
8695 /* Check for the lone bit first. */
8696 if (!fInverted)
8697 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
8698 else
8699 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
8700
8701 /* Then extract and compare the other two bits. */
8702#ifdef RT_ARCH_AMD64
8703 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
8704 if (iBitNo1 > iBitNo2)
8705 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
8706 else
8707 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
8708 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
8709
8710#elif defined(RT_ARCH_ARM64)
8711 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8712
8713 /* and tmpreg, eflreg, #1<<iBitNo1 */
8714 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
8715
8716 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
8717 if (iBitNo1 > iBitNo2)
8718 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
8719 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
8720 else
8721 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
8722 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
8723
8724 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8725
8726#else
8727# error "Port me"
8728#endif
8729
8730 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
8731 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
8732 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
8733
8734 /* Free but don't flush the EFlags and tmp registers. */
8735 iemNativeRegFreeTmp(pReNative, idxTmpReg);
8736 iemNativeRegFreeTmp(pReNative, idxEflReg);
8737
8738 /* Make a copy of the core state now as we start the if-block. */
8739 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
8740
8741 return off;
8742}
8743
8744
8745#define IEM_MC_IF_CX_IS_NZ() \
8746 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
8747 do {
8748
8749/** Emits code for IEM_MC_IF_CX_IS_NZ. */
8750DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
8751{
8752 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8753
8754 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
8755 kIemNativeGstRegUse_ReadOnly);
8756 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
8757 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
8758
8759 iemNativeCondStartIfBlock(pReNative, off);
8760 return off;
8761}
8762
8763
8764#define IEM_MC_IF_ECX_IS_NZ() \
8765 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
8766 do {
8767
8768#define IEM_MC_IF_RCX_IS_NZ() \
8769 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
8770 do {
8771
8772/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
8773DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
8774{
8775 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8776
8777 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
8778 kIemNativeGstRegUse_ReadOnly);
8779 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
8780 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
8781
8782 iemNativeCondStartIfBlock(pReNative, off);
8783 return off;
8784}
8785
8786
8787#define IEM_MC_IF_CX_IS_NOT_ONE() \
8788 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
8789 do {
8790
8791/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
8792DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
8793{
8794 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8795
8796 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
8797 kIemNativeGstRegUse_ReadOnly);
8798#ifdef RT_ARCH_AMD64
8799 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
8800#else
8801 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
8802 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
8803 iemNativeRegFreeTmp(pReNative, idxTmpReg);
8804#endif
8805 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
8806
8807 iemNativeCondStartIfBlock(pReNative, off);
8808 return off;
8809}
8810
8811
8812#define IEM_MC_IF_ECX_IS_NOT_ONE() \
8813 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
8814 do {
8815
8816#define IEM_MC_IF_RCX_IS_NOT_ONE() \
8817 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
8818 do {
8819
8820/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
8821DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
8822{
8823 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8824
8825 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
8826 kIemNativeGstRegUse_ReadOnly);
8827 if (f64Bit)
8828 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
8829 else
8830 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
8831 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
8832
8833 iemNativeCondStartIfBlock(pReNative, off);
8834 return off;
8835}
8836
8837
8838#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
8839 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
8840 do {
8841
8842#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
8843 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
8844 do {
8845
8846/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
8847 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
8848DECL_INLINE_THROW(uint32_t)
8849iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
8850{
8851 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8852
8853 /* We have to load both RCX and EFLAGS before we can start branching,
8854 otherwise we'll end up in the else-block with an inconsistent
8855 register allocator state.
8856 Doing EFLAGS first as it's more likely to be loaded, right? */
8857 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8858 kIemNativeGstRegUse_ReadOnly);
8859 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
8860 kIemNativeGstRegUse_ReadOnly);
8861
8862 /** @todo we could reduce this to a single branch instruction by spending a
8863 * temporary register and some setnz stuff. Not sure if loops are
8864 * worth it. */
8865 /* Check CX. */
8866#ifdef RT_ARCH_AMD64
8867 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
8868#else
8869 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
8870 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
8871 iemNativeRegFreeTmp(pReNative, idxTmpReg);
8872#endif
8873
8874 /* Check the EFlags bit. */
8875 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
8876 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
8877 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
8878 !fCheckIfSet /*fJmpIfSet*/);
8879
8880 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
8881 iemNativeRegFreeTmp(pReNative, idxEflReg);
8882
8883 iemNativeCondStartIfBlock(pReNative, off);
8884 return off;
8885}
8886
8887
8888#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
8889 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
8890 do {
8891
8892#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
8893 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
8894 do {
8895
8896#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
8897 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
8898 do {
8899
8900#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
8901 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
8902 do {
8903
8904/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
8905 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
8906 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
8907 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
8908DECL_INLINE_THROW(uint32_t)
8909iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8910 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
8911{
8912 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8913
8914 /* We have to load both RCX and EFLAGS before we can start branching,
8915 otherwise we'll end up in the else-block with an inconsistent
8916 register allocator state.
8917 Doing EFLAGS first as it's more likely to be loaded, right? */
8918 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8919 kIemNativeGstRegUse_ReadOnly);
8920 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
8921 kIemNativeGstRegUse_ReadOnly);
8922
8923 /** @todo we could reduce this to a single branch instruction by spending a
8924 * temporary register and some setnz stuff. Not sure if loops are
8925 * worth it. */
8926 /* Check RCX/ECX. */
8927 if (f64Bit)
8928 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
8929 else
8930 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
8931
8932 /* Check the EFlags bit. */
8933 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
8934 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
8935 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
8936 !fCheckIfSet /*fJmpIfSet*/);
8937
8938 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
8939 iemNativeRegFreeTmp(pReNative, idxEflReg);
8940
8941 iemNativeCondStartIfBlock(pReNative, off);
8942 return off;
8943}
8944
8945
8946
8947/*********************************************************************************************************************************
8948* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
8949*********************************************************************************************************************************/
8950/** Number of hidden arguments for CIMPL calls.
8951 * @note We're sufferning from the usual VBOXSTRICTRC fun on Windows. */
8952#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
8953# define IEM_CIMPL_HIDDEN_ARGS 3
8954#else
8955# define IEM_CIMPL_HIDDEN_ARGS 2
8956#endif
8957
8958#define IEM_MC_NOREF(a_Name) \
8959 RT_NOREF_PV(a_Name)
8960
8961#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
8962 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
8963
8964#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
8965 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
8966
8967#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
8968 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
8969
8970#define IEM_MC_LOCAL(a_Type, a_Name) \
8971 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
8972
8973#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
8974 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
8975
8976
8977/**
8978 * Gets the number of hidden arguments for an expected IEM_MC_CALL statement.
8979 */
8980DECLINLINE(uint8_t) iemNativeArgGetHiddenArgCount(PIEMRECOMPILERSTATE pReNative)
8981{
8982 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_CIMPL)
8983 return IEM_CIMPL_HIDDEN_ARGS;
8984 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE)
8985 return 1;
8986 return 0;
8987}
8988
8989
8990/**
8991 * Internal work that allocates a variable with kind set to
8992 * kIemNativeVarKind_Invalid and no current stack allocation.
8993 *
8994 * The kind will either be set by the caller or later when the variable is first
8995 * assigned a value.
8996 *
8997 * @returns Unpacked index.
8998 * @internal
8999 */
9000static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
9001{
9002 Assert(cbType > 0 && cbType <= 64);
9003 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
9004 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
9005 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
9006 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
9007 pReNative->Core.aVars[idxVar].cbVar = cbType;
9008 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
9009 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
9010 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
9011 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
9012 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
9013 pReNative->Core.aVars[idxVar].fRegAcquired = false;
9014 pReNative->Core.aVars[idxVar].u.uValue = 0;
9015 return idxVar;
9016}
9017
9018
9019/**
9020 * Internal work that allocates an argument variable w/o setting enmKind.
9021 *
9022 * @returns Unpacked index.
9023 * @internal
9024 */
9025static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
9026{
9027 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
9028 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
9029 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
9030
9031 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
9032 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
9033 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
9034 return idxVar;
9035}
9036
9037
9038/**
9039 * Gets the stack slot for a stack variable, allocating one if necessary.
9040 *
9041 * Calling this function implies that the stack slot will contain a valid
9042 * variable value. The caller deals with any register currently assigned to the
9043 * variable, typically by spilling it into the stack slot.
9044 *
9045 * @returns The stack slot number.
9046 * @param pReNative The recompiler state.
9047 * @param idxVar The variable.
9048 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
9049 */
9050DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
9051{
9052 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9053 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9054 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
9055
9056 /* Already got a slot? */
9057 uint8_t const idxStackSlot = pVar->idxStackSlot;
9058 if (idxStackSlot != UINT8_MAX)
9059 {
9060 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
9061 return idxStackSlot;
9062 }
9063
9064 /*
9065 * A single slot is easy to allocate.
9066 * Allocate them from the top end, closest to BP, to reduce the displacement.
9067 */
9068 if (pVar->cbVar <= sizeof(uint64_t))
9069 {
9070 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
9071 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
9072 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
9073 pVar->idxStackSlot = (uint8_t)iSlot;
9074 Log11(("iemNativeVarSetKindToStack: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
9075 return (uint8_t)iSlot;
9076 }
9077
9078 /*
9079 * We need more than one stack slot.
9080 *
9081 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
9082 */
9083 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
9084 Assert(pVar->cbVar <= 64);
9085 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
9086 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
9087 uint32_t bmStack = ~pReNative->Core.bmStack;
9088 while (bmStack != UINT32_MAX)
9089 {
9090/** @todo allocate from the top to reduce BP displacement. */
9091 unsigned const iSlot = ASMBitFirstSetU32(bmStack) - 1;
9092 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
9093 if (!(iSlot & fBitAlignMask))
9094 {
9095 if ((bmStack & (fBitAllocMask << iSlot)) == (fBitAllocMask << iSlot))
9096 {
9097 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
9098 pVar->idxStackSlot = (uint8_t)iSlot;
9099 Log11(("iemNativeVarSetKindToStack: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
9100 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
9101 return (uint8_t)iSlot;
9102 }
9103 }
9104 bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);
9105 }
9106 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
9107}
9108
9109
9110/**
9111 * Changes the variable to a stack variable.
9112 *
9113 * Currently this is s only possible to do the first time the variable is used,
9114 * switching later is can be implemented but not done.
9115 *
9116 * @param pReNative The recompiler state.
9117 * @param idxVar The variable.
9118 * @throws VERR_IEM_VAR_IPE_2
9119 */
9120static void iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
9121{
9122 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9123 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9124 if (pVar->enmKind != kIemNativeVarKind_Stack)
9125 {
9126 /* We could in theory transition from immediate to stack as well, but it
9127 would involve the caller doing work storing the value on the stack. So,
9128 till that's required we only allow transition from invalid. */
9129 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9130 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9131 pVar->enmKind = kIemNativeVarKind_Stack;
9132
9133 /* Note! We don't allocate a stack slot here, that's only done when a
9134 slot is actually needed to hold a variable value. */
9135 }
9136}
9137
9138
9139/**
9140 * Sets it to a variable with a constant value.
9141 *
9142 * This does not require stack storage as we know the value and can always
9143 * reload it, unless of course it's referenced.
9144 *
9145 * @param pReNative The recompiler state.
9146 * @param idxVar The variable.
9147 * @param uValue The immediate value.
9148 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
9149 */
9150static void iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
9151{
9152 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9153 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9154 if (pVar->enmKind != kIemNativeVarKind_Immediate)
9155 {
9156 /* Only simple transitions for now. */
9157 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9158 pVar->enmKind = kIemNativeVarKind_Immediate;
9159 }
9160 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9161
9162 pVar->u.uValue = uValue;
9163 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
9164 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
9165 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
9166}
9167
9168
9169/**
9170 * Sets the variable to a reference (pointer) to @a idxOtherVar.
9171 *
9172 * This does not require stack storage as we know the value and can always
9173 * reload it. Loading is postponed till needed.
9174 *
9175 * @param pReNative The recompiler state.
9176 * @param idxVar The variable. Unpacked.
9177 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
9178 *
9179 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
9180 * @internal
9181 */
9182static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
9183{
9184 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
9185 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
9186
9187 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
9188 {
9189 /* Only simple transitions for now. */
9190 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
9191 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9192 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
9193 }
9194 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9195
9196 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
9197
9198 /* Update the other variable, ensure it's a stack variable. */
9199 /** @todo handle variables with const values... that'll go boom now. */
9200 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
9201 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
9202}
9203
9204
9205/**
9206 * Sets the variable to a reference (pointer) to a guest register reference.
9207 *
9208 * This does not require stack storage as we know the value and can always
9209 * reload it. Loading is postponed till needed.
9210 *
9211 * @param pReNative The recompiler state.
9212 * @param idxVar The variable.
9213 * @param enmRegClass The class guest registers to reference.
9214 * @param idxReg The register within @a enmRegClass to reference.
9215 *
9216 * @throws VERR_IEM_VAR_IPE_2
9217 */
9218static void iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
9219 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
9220{
9221 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9222 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9223
9224 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
9225 {
9226 /* Only simple transitions for now. */
9227 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9228 pVar->enmKind = kIemNativeVarKind_GstRegRef;
9229 }
9230 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9231
9232 pVar->u.GstRegRef.enmClass = enmRegClass;
9233 pVar->u.GstRegRef.idx = idxReg;
9234}
9235
9236
9237DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
9238{
9239 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
9240}
9241
9242
9243DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
9244{
9245 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
9246
9247 /* Since we're using a generic uint64_t value type, we must truncate it if
9248 the variable is smaller otherwise we may end up with too large value when
9249 scaling up a imm8 w/ sign-extension.
9250
9251 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
9252 in the bios, bx=1) when running on arm, because clang expect 16-bit
9253 register parameters to have bits 16 and up set to zero. Instead of
9254 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
9255 CF value in the result. */
9256 switch (cbType)
9257 {
9258 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
9259 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
9260 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
9261 }
9262 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
9263 return idxVar;
9264}
9265
9266
9267DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
9268{
9269 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
9270 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
9271 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
9272 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
9273 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
9274 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
9275
9276 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
9277 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
9278 return idxArgVar;
9279}
9280
9281
9282DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
9283{
9284 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
9285 /* Don't set to stack now, leave that to the first use as for instance
9286 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
9287 return idxVar;
9288}
9289
9290
9291DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
9292{
9293 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
9294
9295 /* Since we're using a generic uint64_t value type, we must truncate it if
9296 the variable is smaller otherwise we may end up with too large value when
9297 scaling up a imm8 w/ sign-extension. */
9298 switch (cbType)
9299 {
9300 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
9301 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
9302 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
9303 }
9304 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
9305 return idxVar;
9306}
9307
9308
9309/**
9310 * Makes sure variable @a idxVar has a register assigned to it and that it stays
9311 * fixed till we call iemNativeVarRegisterRelease.
9312 *
9313 * @returns The host register number.
9314 * @param pReNative The recompiler state.
9315 * @param idxVar The variable.
9316 * @param poff Pointer to the instruction buffer offset.
9317 * In case a register needs to be freed up or the value
9318 * loaded off the stack.
9319 * @param fInitialized Set if the variable must already have been initialized.
9320 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
9321 * the case.
9322 * @param idxRegPref Preferred register number or UINT8_MAX.
9323 */
9324DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
9325 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
9326{
9327 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9328 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9329 Assert(pVar->cbVar <= 8);
9330 Assert(!pVar->fRegAcquired);
9331
9332 uint8_t idxReg = pVar->idxReg;
9333 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9334 {
9335 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
9336 && pVar->enmKind < kIemNativeVarKind_End);
9337 pVar->fRegAcquired = true;
9338 return idxReg;
9339 }
9340
9341 /*
9342 * If the kind of variable has not yet been set, default to 'stack'.
9343 */
9344 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
9345 && pVar->enmKind < kIemNativeVarKind_End);
9346 if (pVar->enmKind == kIemNativeVarKind_Invalid)
9347 iemNativeVarSetKindToStack(pReNative, idxVar);
9348
9349 /*
9350 * We have to allocate a register for the variable, even if its a stack one
9351 * as we don't know if there are modification being made to it before its
9352 * finalized (todo: analyze and insert hints about that?).
9353 *
9354 * If we can, we try get the correct register for argument variables. This
9355 * is assuming that most argument variables are fetched as close as possible
9356 * to the actual call, so that there aren't any interfering hidden calls
9357 * (memory accesses, etc) inbetween.
9358 *
9359 * If we cannot or it's a variable, we make sure no argument registers
9360 * that will be used by this MC block will be allocated here, and we always
9361 * prefer non-volatile registers to avoid needing to spill stuff for internal
9362 * call.
9363 */
9364 /** @todo Detect too early argument value fetches and warn about hidden
9365 * calls causing less optimal code to be generated in the python script. */
9366
9367 uint8_t const uArgNo = pVar->uArgNo;
9368 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
9369 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
9370 {
9371 idxReg = g_aidxIemNativeCallRegs[uArgNo];
9372 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
9373 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
9374 }
9375 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
9376 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
9377 {
9378 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
9379 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
9380 & ~pReNative->Core.bmHstRegsWithGstShadow
9381 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
9382 & fNotArgsMask;
9383 if (fRegs)
9384 {
9385 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
9386 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
9387 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
9388 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
9389 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
9390 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
9391 }
9392 else
9393 {
9394 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
9395 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
9396 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
9397 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
9398 }
9399 }
9400 else
9401 {
9402 idxReg = idxRegPref;
9403 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
9404 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
9405 }
9406 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
9407 pVar->idxReg = idxReg;
9408
9409 /*
9410 * Load it off the stack if we've got a stack slot.
9411 */
9412 uint8_t const idxStackSlot = pVar->idxStackSlot;
9413 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
9414 {
9415 Assert(fInitialized);
9416 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
9417 switch (pVar->cbVar)
9418 {
9419 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
9420 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
9421 case 3: AssertFailed(); RT_FALL_THRU();
9422 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
9423 default: AssertFailed(); RT_FALL_THRU();
9424 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
9425 }
9426 }
9427 else
9428 {
9429 Assert(idxStackSlot == UINT8_MAX);
9430 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
9431 }
9432 pVar->fRegAcquired = true;
9433 return idxReg;
9434}
9435
9436
9437/**
9438 * The value of variable @a idxVar will be written in full to the @a enmGstReg
9439 * guest register.
9440 *
9441 * This function makes sure there is a register for it and sets it to be the
9442 * current shadow copy of @a enmGstReg.
9443 *
9444 * @returns The host register number.
9445 * @param pReNative The recompiler state.
9446 * @param idxVar The variable.
9447 * @param enmGstReg The guest register this variable will be written to
9448 * after this call.
9449 * @param poff Pointer to the instruction buffer offset.
9450 * In case a register needs to be freed up or if the
9451 * variable content needs to be loaded off the stack.
9452 *
9453 * @note We DO NOT expect @a idxVar to be an argument variable,
9454 * because we can only in the commit stage of an instruction when this
9455 * function is used.
9456 */
9457DECL_HIDDEN_THROW(uint8_t)
9458iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
9459{
9460 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9461 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9462 Assert(!pVar->fRegAcquired);
9463 AssertMsgStmt( pVar->cbVar <= 8
9464 && ( pVar->enmKind == kIemNativeVarKind_Immediate
9465 || pVar->enmKind == kIemNativeVarKind_Stack),
9466 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
9467 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
9468 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
9469
9470 /*
9471 * This shouldn't ever be used for arguments, unless it's in a weird else
9472 * branch that doesn't do any calling and even then it's questionable.
9473 *
9474 * However, in case someone writes crazy wrong MC code and does register
9475 * updates before making calls, just use the regular register allocator to
9476 * ensure we get a register suitable for the intended argument number.
9477 */
9478 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
9479
9480 /*
9481 * If there is already a register for the variable, we transfer/set the
9482 * guest shadow copy assignment to it.
9483 */
9484 uint8_t idxReg = pVar->idxReg;
9485 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9486 {
9487 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
9488 {
9489 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
9490 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
9491 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
9492 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
9493 }
9494 else
9495 {
9496 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
9497 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
9498 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
9499 }
9500 /** @todo figure this one out. We need some way of making sure the register isn't
9501 * modified after this point, just in case we start writing crappy MC code. */
9502 pVar->enmGstReg = enmGstReg;
9503 pVar->fRegAcquired = true;
9504 return idxReg;
9505 }
9506 Assert(pVar->uArgNo == UINT8_MAX);
9507
9508 /*
9509 * Because this is supposed to be the commit stage, we're just tag along with the
9510 * temporary register allocator and upgrade it to a variable register.
9511 */
9512 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
9513 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
9514 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
9515 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
9516 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
9517 pVar->idxReg = idxReg;
9518
9519 /*
9520 * Now we need to load the register value.
9521 */
9522 if (pVar->enmKind == kIemNativeVarKind_Immediate)
9523 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
9524 else
9525 {
9526 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
9527 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
9528 switch (pVar->cbVar)
9529 {
9530 case sizeof(uint64_t):
9531 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
9532 break;
9533 case sizeof(uint32_t):
9534 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
9535 break;
9536 case sizeof(uint16_t):
9537 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
9538 break;
9539 case sizeof(uint8_t):
9540 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
9541 break;
9542 default:
9543 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
9544 }
9545 }
9546
9547 pVar->fRegAcquired = true;
9548 return idxReg;
9549}
9550
9551
9552/**
9553 * Sets the host register for @a idxVarRc to @a idxReg.
9554 *
9555 * The register must not be allocated. Any guest register shadowing will be
9556 * implictly dropped by this call.
9557 *
9558 * The variable must not have any register associated with it (causes
9559 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
9560 * implied.
9561 *
9562 * @returns idxReg
9563 * @param pReNative The recompiler state.
9564 * @param idxVar The variable.
9565 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
9566 * @param off For recording in debug info.
9567 *
9568 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
9569 */
9570DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
9571{
9572 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9573 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9574 Assert(!pVar->fRegAcquired);
9575 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
9576 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
9577 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
9578
9579 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
9580 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
9581
9582 iemNativeVarSetKindToStack(pReNative, idxVar);
9583 pVar->idxReg = idxReg;
9584
9585 return idxReg;
9586}
9587
9588
9589/**
9590 * A convenient helper function.
9591 */
9592DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
9593 uint8_t idxReg, uint32_t *poff)
9594{
9595 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
9596 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
9597 return idxReg;
9598}
9599
9600
9601/**
9602 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
9603 *
9604 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
9605 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
9606 * requirement of flushing anything in volatile host registers when making a
9607 * call.
9608 *
9609 * @returns New @a off value.
9610 * @param pReNative The recompiler state.
9611 * @param off The code buffer position.
9612 * @param fHstRegsNotToSave Set of registers not to save & restore.
9613 */
9614DECL_HIDDEN_THROW(uint32_t)
9615iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
9616{
9617 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
9618 if (fHstRegs)
9619 {
9620 do
9621 {
9622 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
9623 fHstRegs &= ~RT_BIT_32(idxHstReg);
9624
9625 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
9626 {
9627 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
9628 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9629 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
9630 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
9631 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
9632 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
9633 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
9634 {
9635 case kIemNativeVarKind_Stack:
9636 {
9637 /* Temporarily spill the variable register. */
9638 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
9639 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
9640 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
9641 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
9642 continue;
9643 }
9644
9645 case kIemNativeVarKind_Immediate:
9646 case kIemNativeVarKind_VarRef:
9647 case kIemNativeVarKind_GstRegRef:
9648 /* It is weird to have any of these loaded at this point. */
9649 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
9650 continue;
9651
9652 case kIemNativeVarKind_End:
9653 case kIemNativeVarKind_Invalid:
9654 break;
9655 }
9656 AssertFailed();
9657 }
9658 else
9659 {
9660 /*
9661 * Allocate a temporary stack slot and spill the register to it.
9662 */
9663 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
9664 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
9665 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
9666 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
9667 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
9668 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
9669 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
9670 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
9671 }
9672 } while (fHstRegs);
9673 }
9674 return off;
9675}
9676
9677
9678/**
9679 * Emit code to restore volatile registers after to a call to a helper.
9680 *
9681 * @returns New @a off value.
9682 * @param pReNative The recompiler state.
9683 * @param off The code buffer position.
9684 * @param fHstRegsNotToSave Set of registers not to save & restore.
9685 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
9686 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
9687 */
9688DECL_HIDDEN_THROW(uint32_t)
9689iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
9690{
9691 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
9692 if (fHstRegs)
9693 {
9694 do
9695 {
9696 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
9697 fHstRegs &= ~RT_BIT_32(idxHstReg);
9698
9699 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
9700 {
9701 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
9702 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9703 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
9704 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
9705 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
9706 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
9707 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
9708 {
9709 case kIemNativeVarKind_Stack:
9710 {
9711 /* Unspill the variable register. */
9712 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
9713 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
9714 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
9715 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
9716 continue;
9717 }
9718
9719 case kIemNativeVarKind_Immediate:
9720 case kIemNativeVarKind_VarRef:
9721 case kIemNativeVarKind_GstRegRef:
9722 /* It is weird to have any of these loaded at this point. */
9723 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
9724 continue;
9725
9726 case kIemNativeVarKind_End:
9727 case kIemNativeVarKind_Invalid:
9728 break;
9729 }
9730 AssertFailed();
9731 }
9732 else
9733 {
9734 /*
9735 * Restore from temporary stack slot.
9736 */
9737 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
9738 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
9739 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
9740 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
9741
9742 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
9743 }
9744 } while (fHstRegs);
9745 }
9746 return off;
9747}
9748
9749
9750/**
9751 * Worker that frees the stack slots for variable @a idxVar if any allocated.
9752 *
9753 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
9754 *
9755 * ASSUMES that @a idxVar is valid and unpacked.
9756 */
9757DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
9758{
9759 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
9760 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
9761 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
9762 {
9763 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
9764 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
9765 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
9766 Assert(cSlots > 0);
9767 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
9768 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
9769 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
9770 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
9771 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
9772 }
9773 else
9774 Assert(idxStackSlot == UINT8_MAX);
9775}
9776
9777
9778/**
9779 * Worker that frees a single variable.
9780 *
9781 * ASSUMES that @a idxVar is valid and unpacked.
9782 */
9783DECLINLINE(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
9784{
9785 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
9786 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
9787 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
9788
9789 /* Free the host register first if any assigned. */
9790 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
9791 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9792 {
9793 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
9794 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
9795 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
9796 }
9797
9798 /* Free argument mapping. */
9799 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
9800 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
9801 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
9802
9803 /* Free the stack slots. */
9804 iemNativeVarFreeStackSlots(pReNative, idxVar);
9805
9806 /* Free the actual variable. */
9807 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
9808 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
9809}
9810
9811
9812/**
9813 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
9814 */
9815DECLINLINE(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
9816{
9817 while (bmVars != 0)
9818 {
9819 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
9820 bmVars &= ~RT_BIT_32(idxVar);
9821
9822#if 1 /** @todo optimize by simplifying this later... */
9823 iemNativeVarFreeOneWorker(pReNative, idxVar);
9824#else
9825 /* Only need to free the host register, the rest is done as bulk updates below. */
9826 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
9827 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9828 {
9829 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
9830 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
9831 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
9832 }
9833#endif
9834 }
9835#if 0 /** @todo optimize by simplifying this later... */
9836 pReNative->Core.bmVars = 0;
9837 pReNative->Core.bmStack = 0;
9838 pReNative->Core.u64ArgVars = UINT64_MAX;
9839#endif
9840}
9841
9842
9843/**
9844 * This is called by IEM_MC_END() to clean up all variables.
9845 */
9846DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
9847{
9848 uint32_t const bmVars = pReNative->Core.bmVars;
9849 if (bmVars != 0)
9850 iemNativeVarFreeAllSlow(pReNative, bmVars);
9851 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
9852 Assert(pReNative->Core.bmStack == 0);
9853}
9854
9855
9856#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
9857
9858/**
9859 * This is called by IEM_MC_FREE_LOCAL.
9860 */
9861DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
9862{
9863 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9864 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
9865 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
9866}
9867
9868
9869#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
9870
9871/**
9872 * This is called by IEM_MC_FREE_ARG.
9873 */
9874DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
9875{
9876 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9877 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
9878 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
9879}
9880
9881
9882#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
9883
9884/**
9885 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
9886 */
9887DECL_INLINE_THROW(uint32_t)
9888iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
9889{
9890 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
9891 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
9892 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9893 Assert( pVarDst->cbVar == sizeof(uint16_t)
9894 || pVarDst->cbVar == sizeof(uint32_t));
9895
9896 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
9897 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
9898 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
9899 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
9900 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9901
9902 Assert(pVarDst->cbVar < pVarSrc->cbVar);
9903
9904 /*
9905 * Special case for immediates.
9906 */
9907 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
9908 {
9909 switch (pVarDst->cbVar)
9910 {
9911 case sizeof(uint16_t):
9912 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
9913 break;
9914 case sizeof(uint32_t):
9915 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
9916 break;
9917 default: AssertFailed(); break;
9918 }
9919 }
9920 else
9921 {
9922 /*
9923 * The generic solution for now.
9924 */
9925 /** @todo optimize this by having the python script make sure the source
9926 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
9927 * statement. Then we could just transfer the register assignments. */
9928 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
9929 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
9930 switch (pVarDst->cbVar)
9931 {
9932 case sizeof(uint16_t):
9933 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
9934 break;
9935 case sizeof(uint32_t):
9936 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
9937 break;
9938 default: AssertFailed(); break;
9939 }
9940 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
9941 iemNativeVarRegisterRelease(pReNative, idxVarDst);
9942 }
9943 return off;
9944}
9945
9946
9947
9948/*********************************************************************************************************************************
9949* Emitters for IEM_MC_CALL_CIMPL_XXX *
9950*********************************************************************************************************************************/
9951
9952/**
9953 * Emits code to load a reference to the given guest register into @a idxGprDst.
9954 */
9955DECL_INLINE_THROW(uint32_t)
9956iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
9957 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
9958{
9959#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
9960 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
9961#endif
9962
9963 /*
9964 * Get the offset relative to the CPUMCTX structure.
9965 */
9966 uint32_t offCpumCtx;
9967 switch (enmClass)
9968 {
9969 case kIemNativeGstRegRef_Gpr:
9970 Assert(idxRegInClass < 16);
9971 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
9972 break;
9973
9974 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
9975 Assert(idxRegInClass < 4);
9976 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
9977 break;
9978
9979 case kIemNativeGstRegRef_EFlags:
9980 Assert(idxRegInClass == 0);
9981 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
9982 break;
9983
9984 case kIemNativeGstRegRef_MxCsr:
9985 Assert(idxRegInClass == 0);
9986 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
9987 break;
9988
9989 case kIemNativeGstRegRef_FpuReg:
9990 Assert(idxRegInClass < 8);
9991 AssertFailed(); /** @todo what kind of indexing? */
9992 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
9993 break;
9994
9995 case kIemNativeGstRegRef_MReg:
9996 Assert(idxRegInClass < 8);
9997 AssertFailed(); /** @todo what kind of indexing? */
9998 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
9999 break;
10000
10001 case kIemNativeGstRegRef_XReg:
10002 Assert(idxRegInClass < 16);
10003 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
10004 break;
10005
10006 default:
10007 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
10008 }
10009
10010 /*
10011 * Load the value into the destination register.
10012 */
10013#ifdef RT_ARCH_AMD64
10014 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
10015
10016#elif defined(RT_ARCH_ARM64)
10017 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
10018 Assert(offCpumCtx < 4096);
10019 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
10020
10021#else
10022# error "Port me!"
10023#endif
10024
10025 return off;
10026}
10027
10028
10029/**
10030 * Common code for CIMPL and AIMPL calls.
10031 *
10032 * These are calls that uses argument variables and such. They should not be
10033 * confused with internal calls required to implement an MC operation,
10034 * like a TLB load and similar.
10035 *
10036 * Upon return all that is left to do is to load any hidden arguments and
10037 * perform the call. All argument variables are freed.
10038 *
10039 * @returns New code buffer offset; throws VBox status code on error.
10040 * @param pReNative The native recompile state.
10041 * @param off The code buffer offset.
10042 * @param cArgs The total nubmer of arguments (includes hidden
10043 * count).
10044 * @param cHiddenArgs The number of hidden arguments. The hidden
10045 * arguments must not have any variable declared for
10046 * them, whereas all the regular arguments must
10047 * (tstIEMCheckMc ensures this).
10048 */
10049DECL_HIDDEN_THROW(uint32_t)
10050iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
10051{
10052#ifdef VBOX_STRICT
10053 /*
10054 * Assert sanity.
10055 */
10056 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
10057 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
10058 for (unsigned i = 0; i < cHiddenArgs; i++)
10059 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
10060 for (unsigned i = cHiddenArgs; i < cArgs; i++)
10061 {
10062 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
10063 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
10064 }
10065 iemNativeRegAssertSanity(pReNative);
10066#endif
10067
10068 /* We don't know what the called function makes use of, so flush any pending register writes. */
10069 off = iemNativeRegFlushPendingWrites(pReNative, off);
10070
10071 /*
10072 * Before we do anything else, go over variables that are referenced and
10073 * make sure they are not in a register.
10074 */
10075 uint32_t bmVars = pReNative->Core.bmVars;
10076 if (bmVars)
10077 {
10078 do
10079 {
10080 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
10081 bmVars &= ~RT_BIT_32(idxVar);
10082
10083 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
10084 {
10085 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
10086 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
10087 {
10088 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
10089 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
10090 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
10091 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
10092 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
10093
10094 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
10095 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
10096 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
10097 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
10098 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
10099 }
10100 }
10101 } while (bmVars != 0);
10102#if 0 //def VBOX_STRICT
10103 iemNativeRegAssertSanity(pReNative);
10104#endif
10105 }
10106
10107 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
10108
10109 /*
10110 * First, go over the host registers that will be used for arguments and make
10111 * sure they either hold the desired argument or are free.
10112 */
10113 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
10114 {
10115 for (uint32_t i = 0; i < cRegArgs; i++)
10116 {
10117 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
10118 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
10119 {
10120 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
10121 {
10122 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
10123 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
10124 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
10125 Assert(pVar->idxReg == idxArgReg);
10126 uint8_t const uArgNo = pVar->uArgNo;
10127 if (uArgNo == i)
10128 { /* prefect */ }
10129 /* The variable allocator logic should make sure this is impossible,
10130 except for when the return register is used as a parameter (ARM,
10131 but not x86). */
10132#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
10133 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
10134 {
10135# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
10136# error "Implement this"
10137# endif
10138 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
10139 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
10140 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
10141 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
10142 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
10143 }
10144#endif
10145 else
10146 {
10147 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
10148
10149 if (pVar->enmKind == kIemNativeVarKind_Stack)
10150 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
10151 else
10152 {
10153 /* just free it, can be reloaded if used again */
10154 pVar->idxReg = UINT8_MAX;
10155 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
10156 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
10157 }
10158 }
10159 }
10160 else
10161 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
10162 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
10163 }
10164 }
10165#if 0 //def VBOX_STRICT
10166 iemNativeRegAssertSanity(pReNative);
10167#endif
10168 }
10169
10170 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
10171
10172#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
10173 /*
10174 * If there are any stack arguments, make sure they are in their place as well.
10175 *
10176 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
10177 * the caller) be loading it later and it must be free (see first loop).
10178 */
10179 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
10180 {
10181 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
10182 {
10183 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
10184 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
10185 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
10186 {
10187 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
10188 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
10189 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
10190 pVar->idxReg = UINT8_MAX;
10191 }
10192 else
10193 {
10194 /* Use ARG0 as temp for stuff we need registers for. */
10195 switch (pVar->enmKind)
10196 {
10197 case kIemNativeVarKind_Stack:
10198 {
10199 uint8_t const idxStackSlot = pVar->idxStackSlot;
10200 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
10201 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
10202 iemNativeStackCalcBpDisp(idxStackSlot));
10203 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
10204 continue;
10205 }
10206
10207 case kIemNativeVarKind_Immediate:
10208 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
10209 continue;
10210
10211 case kIemNativeVarKind_VarRef:
10212 {
10213 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
10214 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
10215 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
10216 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
10217 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
10218 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
10219 {
10220 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
10221 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
10222 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
10223 }
10224 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
10225 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
10226 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
10227 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
10228 continue;
10229 }
10230
10231 case kIemNativeVarKind_GstRegRef:
10232 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
10233 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
10234 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
10235 continue;
10236
10237 case kIemNativeVarKind_Invalid:
10238 case kIemNativeVarKind_End:
10239 break;
10240 }
10241 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
10242 }
10243 }
10244# if 0 //def VBOX_STRICT
10245 iemNativeRegAssertSanity(pReNative);
10246# endif
10247 }
10248#else
10249 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
10250#endif
10251
10252 /*
10253 * Make sure the argument variables are loaded into their respective registers.
10254 *
10255 * We can optimize this by ASSUMING that any register allocations are for
10256 * registeres that have already been loaded and are ready. The previous step
10257 * saw to that.
10258 */
10259 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
10260 {
10261 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
10262 {
10263 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
10264 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
10265 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
10266 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
10267 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
10268 else
10269 {
10270 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
10271 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
10272 {
10273 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
10274 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
10275 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
10276 | RT_BIT_32(idxArgReg);
10277 pVar->idxReg = idxArgReg;
10278 }
10279 else
10280 {
10281 /* Use ARG0 as temp for stuff we need registers for. */
10282 switch (pVar->enmKind)
10283 {
10284 case kIemNativeVarKind_Stack:
10285 {
10286 uint8_t const idxStackSlot = pVar->idxStackSlot;
10287 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
10288 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
10289 continue;
10290 }
10291
10292 case kIemNativeVarKind_Immediate:
10293 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
10294 continue;
10295
10296 case kIemNativeVarKind_VarRef:
10297 {
10298 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
10299 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
10300 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
10301 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
10302 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
10303 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
10304 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
10305 {
10306 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
10307 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
10308 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
10309 }
10310 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
10311 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
10312 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
10313 continue;
10314 }
10315
10316 case kIemNativeVarKind_GstRegRef:
10317 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
10318 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
10319 continue;
10320
10321 case kIemNativeVarKind_Invalid:
10322 case kIemNativeVarKind_End:
10323 break;
10324 }
10325 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
10326 }
10327 }
10328 }
10329#if 0 //def VBOX_STRICT
10330 iemNativeRegAssertSanity(pReNative);
10331#endif
10332 }
10333#ifdef VBOX_STRICT
10334 else
10335 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
10336 {
10337 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
10338 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
10339 }
10340#endif
10341
10342 /*
10343 * Free all argument variables (simplified).
10344 * Their lifetime always expires with the call they are for.
10345 */
10346 /** @todo Make the python script check that arguments aren't used after
10347 * IEM_MC_CALL_XXXX. */
10348 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
10349 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
10350 * an argument value. There is also some FPU stuff. */
10351 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
10352 {
10353 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
10354 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
10355
10356 /* no need to free registers: */
10357 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
10358 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
10359 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
10360 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
10361 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
10362 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
10363
10364 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
10365 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
10366 iemNativeVarFreeStackSlots(pReNative, idxVar);
10367 }
10368 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
10369
10370 /*
10371 * Flush volatile registers as we make the call.
10372 */
10373 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
10374
10375 return off;
10376}
10377
10378
10379/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
10380DECL_HIDDEN_THROW(uint32_t)
10381iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
10382 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
10383
10384{
10385 /*
10386 * Do all the call setup and cleanup.
10387 */
10388 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
10389
10390 /*
10391 * Load the two or three hidden arguments.
10392 */
10393#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
10394 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
10395 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
10396 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
10397#else
10398 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
10399 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
10400#endif
10401
10402 /*
10403 * Make the call and check the return code.
10404 *
10405 * Shadow PC copies are always flushed here, other stuff depends on flags.
10406 * Segment and general purpose registers are explictily flushed via the
10407 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
10408 * macros.
10409 */
10410 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
10411#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
10412 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
10413#endif
10414 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
10415 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
10416 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
10417 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
10418
10419 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
10420}
10421
10422
10423#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
10424 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
10425
10426/** Emits code for IEM_MC_CALL_CIMPL_1. */
10427DECL_INLINE_THROW(uint32_t)
10428iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
10429 uintptr_t pfnCImpl, uint8_t idxArg0)
10430{
10431 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
10432 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
10433}
10434
10435
10436#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
10437 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
10438
10439/** Emits code for IEM_MC_CALL_CIMPL_2. */
10440DECL_INLINE_THROW(uint32_t)
10441iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
10442 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
10443{
10444 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
10445 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
10446 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
10447}
10448
10449
10450#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
10451 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
10452 (uintptr_t)a_pfnCImpl, a0, a1, a2)
10453
10454/** Emits code for IEM_MC_CALL_CIMPL_3. */
10455DECL_INLINE_THROW(uint32_t)
10456iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
10457 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
10458{
10459 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
10460 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
10461 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
10462 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
10463}
10464
10465
10466#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
10467 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
10468 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
10469
10470/** Emits code for IEM_MC_CALL_CIMPL_4. */
10471DECL_INLINE_THROW(uint32_t)
10472iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
10473 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
10474{
10475 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
10476 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
10477 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
10478 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
10479 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
10480}
10481
10482
10483#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
10484 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
10485 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
10486
10487/** Emits code for IEM_MC_CALL_CIMPL_4. */
10488DECL_INLINE_THROW(uint32_t)
10489iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
10490 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
10491{
10492 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
10493 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
10494 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
10495 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
10496 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
10497 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
10498}
10499
10500
10501/** Recompiler debugging: Flush guest register shadow copies. */
10502#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
10503
10504
10505
10506/*********************************************************************************************************************************
10507* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
10508*********************************************************************************************************************************/
10509
10510/**
10511 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
10512 */
10513DECL_INLINE_THROW(uint32_t)
10514iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
10515 uintptr_t pfnAImpl, uint8_t cArgs)
10516{
10517 if (idxVarRc != UINT8_MAX)
10518 {
10519 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
10520 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
10521 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
10522 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
10523 }
10524
10525 /*
10526 * Do all the call setup and cleanup.
10527 */
10528 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/);
10529
10530 /*
10531 * Make the call and update the return code variable if we've got one.
10532 */
10533 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
10534 if (idxVarRc != UINT8_MAX)
10535 {
10536off = iemNativeEmitBrk(pReNative, off, 0x4222); /** @todo test IEM_MC_CALL_AIMPL_3 and IEM_MC_CALL_AIMPL_4 return codes. */
10537 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
10538 }
10539
10540 return off;
10541}
10542
10543
10544
10545#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
10546 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
10547
10548#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
10549 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
10550
10551/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
10552DECL_INLINE_THROW(uint32_t)
10553iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
10554{
10555 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
10556}
10557
10558
10559#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
10560 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
10561
10562#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
10563 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
10564
10565/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
10566DECL_INLINE_THROW(uint32_t)
10567iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
10568{
10569 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
10570 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
10571}
10572
10573
10574#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
10575 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
10576
10577#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
10578 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
10579
10580/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
10581DECL_INLINE_THROW(uint32_t)
10582iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
10583 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
10584{
10585 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
10586 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
10587 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
10588}
10589
10590
10591#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
10592 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
10593
10594#define IEM_MC_CALL_AIMPL_3(a_rc, a_pfn, a0, a1, a2) \
10595 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
10596
10597/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
10598DECL_INLINE_THROW(uint32_t)
10599iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
10600 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
10601{
10602 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
10603 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
10604 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
10605 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
10606}
10607
10608
10609#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
10610 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
10611
10612#define IEM_MC_CALL_AIMPL_4(a_rc, a_pfn, a0, a1, a2, a3) \
10613 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
10614
10615/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
10616DECL_INLINE_THROW(uint32_t)
10617iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
10618 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
10619{
10620 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
10621 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
10622 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
10623 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
10624 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
10625}
10626
10627
10628
10629/*********************************************************************************************************************************
10630* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
10631*********************************************************************************************************************************/
10632
10633#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
10634 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
10635
10636#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
10637 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
10638
10639#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
10640 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
10641
10642#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
10643 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
10644
10645
10646/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
10647 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
10648DECL_INLINE_THROW(uint32_t)
10649iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
10650{
10651 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10652 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
10653 Assert(iGRegEx < 20);
10654
10655 /* Same discussion as in iemNativeEmitFetchGregU16 */
10656 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
10657 kIemNativeGstRegUse_ReadOnly);
10658
10659 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10660 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10661
10662 /* The value is zero-extended to the full 64-bit host register width. */
10663 if (iGRegEx < 16)
10664 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
10665 else
10666 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
10667
10668 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10669 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
10670 return off;
10671}
10672
10673
10674#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
10675 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
10676
10677#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
10678 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
10679
10680#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
10681 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
10682
10683/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
10684DECL_INLINE_THROW(uint32_t)
10685iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
10686{
10687 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10688 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
10689 Assert(iGRegEx < 20);
10690
10691 /* Same discussion as in iemNativeEmitFetchGregU16 */
10692 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
10693 kIemNativeGstRegUse_ReadOnly);
10694
10695 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10696 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10697
10698 if (iGRegEx < 16)
10699 {
10700 switch (cbSignExtended)
10701 {
10702 case sizeof(uint16_t):
10703 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
10704 break;
10705 case sizeof(uint32_t):
10706 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
10707 break;
10708 case sizeof(uint64_t):
10709 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
10710 break;
10711 default: AssertFailed(); break;
10712 }
10713 }
10714 else
10715 {
10716 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
10717 switch (cbSignExtended)
10718 {
10719 case sizeof(uint16_t):
10720 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
10721 break;
10722 case sizeof(uint32_t):
10723 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
10724 break;
10725 case sizeof(uint64_t):
10726 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
10727 break;
10728 default: AssertFailed(); break;
10729 }
10730 }
10731
10732 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10733 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
10734 return off;
10735}
10736
10737
10738
10739#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
10740 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
10741
10742#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
10743 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
10744
10745#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
10746 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
10747
10748/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
10749DECL_INLINE_THROW(uint32_t)
10750iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
10751{
10752 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10753 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
10754 Assert(iGReg < 16);
10755
10756 /*
10757 * We can either just load the low 16-bit of the GPR into a host register
10758 * for the variable, or we can do so via a shadow copy host register. The
10759 * latter will avoid having to reload it if it's being stored later, but
10760 * will waste a host register if it isn't touched again. Since we don't
10761 * know what going to happen, we choose the latter for now.
10762 */
10763 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10764 kIemNativeGstRegUse_ReadOnly);
10765
10766 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10767 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10768 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
10769 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10770
10771 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
10772 return off;
10773}
10774
10775
10776#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
10777 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
10778
10779#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
10780 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
10781
10782/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
10783DECL_INLINE_THROW(uint32_t)
10784iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
10785{
10786 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10787 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
10788 Assert(iGReg < 16);
10789
10790 /*
10791 * We can either just load the low 16-bit of the GPR into a host register
10792 * for the variable, or we can do so via a shadow copy host register. The
10793 * latter will avoid having to reload it if it's being stored later, but
10794 * will waste a host register if it isn't touched again. Since we don't
10795 * know what going to happen, we choose the latter for now.
10796 */
10797 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10798 kIemNativeGstRegUse_ReadOnly);
10799
10800 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10801 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10802 if (cbSignExtended == sizeof(uint32_t))
10803 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
10804 else
10805 {
10806 Assert(cbSignExtended == sizeof(uint64_t));
10807 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
10808 }
10809 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10810
10811 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
10812 return off;
10813}
10814
10815
10816#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
10817 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
10818
10819#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
10820 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
10821
10822/** Emits code for IEM_MC_FETCH_GREG_U32. */
10823DECL_INLINE_THROW(uint32_t)
10824iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
10825{
10826 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10827 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
10828 Assert(iGReg < 16);
10829
10830 /*
10831 * We can either just load the low 16-bit of the GPR into a host register
10832 * for the variable, or we can do so via a shadow copy host register. The
10833 * latter will avoid having to reload it if it's being stored later, but
10834 * will waste a host register if it isn't touched again. Since we don't
10835 * know what going to happen, we choose the latter for now.
10836 */
10837 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10838 kIemNativeGstRegUse_ReadOnly);
10839
10840 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10841 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10842 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
10843 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10844
10845 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
10846 return off;
10847}
10848
10849
10850#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
10851 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
10852
10853/** Emits code for IEM_MC_FETCH_GREG_U32. */
10854DECL_INLINE_THROW(uint32_t)
10855iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
10856{
10857 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10858 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
10859 Assert(iGReg < 16);
10860
10861 /*
10862 * We can either just load the low 32-bit of the GPR into a host register
10863 * for the variable, or we can do so via a shadow copy host register. The
10864 * latter will avoid having to reload it if it's being stored later, but
10865 * will waste a host register if it isn't touched again. Since we don't
10866 * know what going to happen, we choose the latter for now.
10867 */
10868 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10869 kIemNativeGstRegUse_ReadOnly);
10870
10871 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10872 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10873 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
10874 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10875
10876 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
10877 return off;
10878}
10879
10880
10881#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
10882 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
10883
10884#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
10885 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
10886
10887/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
10888 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
10889DECL_INLINE_THROW(uint32_t)
10890iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
10891{
10892 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10893 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
10894 Assert(iGReg < 16);
10895
10896 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10897 kIemNativeGstRegUse_ReadOnly);
10898
10899 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10900 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10901 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
10902 /** @todo name the register a shadow one already? */
10903 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10904
10905 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
10906 return off;
10907}
10908
10909
10910
10911/*********************************************************************************************************************************
10912* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
10913*********************************************************************************************************************************/
10914
10915#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
10916 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
10917
10918/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
10919DECL_INLINE_THROW(uint32_t)
10920iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
10921{
10922 Assert(iGRegEx < 20);
10923 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
10924 kIemNativeGstRegUse_ForUpdate);
10925#ifdef RT_ARCH_AMD64
10926 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
10927
10928 /* To the lowest byte of the register: mov r8, imm8 */
10929 if (iGRegEx < 16)
10930 {
10931 if (idxGstTmpReg >= 8)
10932 pbCodeBuf[off++] = X86_OP_REX_B;
10933 else if (idxGstTmpReg >= 4)
10934 pbCodeBuf[off++] = X86_OP_REX;
10935 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
10936 pbCodeBuf[off++] = u8Value;
10937 }
10938 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
10939 else if (idxGstTmpReg < 4)
10940 {
10941 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
10942 pbCodeBuf[off++] = u8Value;
10943 }
10944 else
10945 {
10946 /* ror reg64, 8 */
10947 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
10948 pbCodeBuf[off++] = 0xc1;
10949 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
10950 pbCodeBuf[off++] = 8;
10951
10952 /* mov reg8, imm8 */
10953 if (idxGstTmpReg >= 8)
10954 pbCodeBuf[off++] = X86_OP_REX_B;
10955 else if (idxGstTmpReg >= 4)
10956 pbCodeBuf[off++] = X86_OP_REX;
10957 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
10958 pbCodeBuf[off++] = u8Value;
10959
10960 /* rol reg64, 8 */
10961 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
10962 pbCodeBuf[off++] = 0xc1;
10963 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
10964 pbCodeBuf[off++] = 8;
10965 }
10966
10967#elif defined(RT_ARCH_ARM64)
10968 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
10969 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
10970 if (iGRegEx < 16)
10971 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
10972 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
10973 else
10974 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
10975 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
10976 iemNativeRegFreeTmp(pReNative, idxImmReg);
10977
10978#else
10979# error "Port me!"
10980#endif
10981
10982 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10983
10984 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
10985
10986 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
10987 return off;
10988}
10989
10990
10991#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
10992 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
10993
10994/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
10995DECL_INLINE_THROW(uint32_t)
10996iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
10997{
10998 Assert(iGRegEx < 20);
10999 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
11000
11001 /*
11002 * If it's a constant value (unlikely) we treat this as a
11003 * IEM_MC_STORE_GREG_U8_CONST statement.
11004 */
11005 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
11006 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
11007 { /* likely */ }
11008 else
11009 {
11010 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
11011 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
11012 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
11013 }
11014
11015 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
11016 kIemNativeGstRegUse_ForUpdate);
11017 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
11018
11019#ifdef RT_ARCH_AMD64
11020 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
11021 if (iGRegEx < 16)
11022 {
11023 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
11024 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
11025 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
11026 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
11027 pbCodeBuf[off++] = X86_OP_REX;
11028 pbCodeBuf[off++] = 0x8a;
11029 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
11030 }
11031 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
11032 else if (idxGstTmpReg < 4 && idxVarReg < 4)
11033 {
11034 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
11035 pbCodeBuf[off++] = 0x8a;
11036 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
11037 }
11038 else
11039 {
11040 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
11041
11042 /* ror reg64, 8 */
11043 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
11044 pbCodeBuf[off++] = 0xc1;
11045 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
11046 pbCodeBuf[off++] = 8;
11047
11048 /* mov reg8, reg8(r/m) */
11049 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
11050 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
11051 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
11052 pbCodeBuf[off++] = X86_OP_REX;
11053 pbCodeBuf[off++] = 0x8a;
11054 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
11055
11056 /* rol reg64, 8 */
11057 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
11058 pbCodeBuf[off++] = 0xc1;
11059 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
11060 pbCodeBuf[off++] = 8;
11061 }
11062
11063#elif defined(RT_ARCH_ARM64)
11064 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
11065 or
11066 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
11067 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11068 if (iGRegEx < 16)
11069 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
11070 else
11071 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
11072
11073#else
11074# error "Port me!"
11075#endif
11076 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11077
11078 iemNativeVarRegisterRelease(pReNative, idxValueVar);
11079
11080 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
11081 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11082 return off;
11083}
11084
11085
11086
11087#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
11088 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
11089
11090/** Emits code for IEM_MC_STORE_GREG_U16. */
11091DECL_INLINE_THROW(uint32_t)
11092iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
11093{
11094 Assert(iGReg < 16);
11095 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11096 kIemNativeGstRegUse_ForUpdate);
11097#ifdef RT_ARCH_AMD64
11098 /* mov reg16, imm16 */
11099 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
11100 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
11101 if (idxGstTmpReg >= 8)
11102 pbCodeBuf[off++] = X86_OP_REX_B;
11103 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
11104 pbCodeBuf[off++] = RT_BYTE1(uValue);
11105 pbCodeBuf[off++] = RT_BYTE2(uValue);
11106
11107#elif defined(RT_ARCH_ARM64)
11108 /* movk xdst, #uValue, lsl #0 */
11109 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11110 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
11111
11112#else
11113# error "Port me!"
11114#endif
11115
11116 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11117
11118 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11119 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11120 return off;
11121}
11122
11123
11124#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
11125 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
11126
11127/** Emits code for IEM_MC_STORE_GREG_U16. */
11128DECL_INLINE_THROW(uint32_t)
11129iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
11130{
11131 Assert(iGReg < 16);
11132 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
11133
11134 /*
11135 * If it's a constant value (unlikely) we treat this as a
11136 * IEM_MC_STORE_GREG_U16_CONST statement.
11137 */
11138 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
11139 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
11140 { /* likely */ }
11141 else
11142 {
11143 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
11144 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
11145 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
11146 }
11147
11148 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11149 kIemNativeGstRegUse_ForUpdate);
11150
11151#ifdef RT_ARCH_AMD64
11152 /* mov reg16, reg16 or [mem16] */
11153 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
11154 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
11155 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
11156 {
11157 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
11158 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
11159 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
11160 pbCodeBuf[off++] = 0x8b;
11161 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
11162 }
11163 else
11164 {
11165 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
11166 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
11167 if (idxGstTmpReg >= 8)
11168 pbCodeBuf[off++] = X86_OP_REX_R;
11169 pbCodeBuf[off++] = 0x8b;
11170 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
11171 }
11172
11173#elif defined(RT_ARCH_ARM64)
11174 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
11175 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
11176 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11177 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
11178 iemNativeVarRegisterRelease(pReNative, idxValueVar);
11179
11180#else
11181# error "Port me!"
11182#endif
11183
11184 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11185
11186 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11187 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11188 return off;
11189}
11190
11191
11192#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
11193 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
11194
11195/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
11196DECL_INLINE_THROW(uint32_t)
11197iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
11198{
11199 Assert(iGReg < 16);
11200 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11201 kIemNativeGstRegUse_ForFullWrite);
11202 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
11203 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11204 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11205 return off;
11206}
11207
11208
11209#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
11210 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
11211
11212/** Emits code for IEM_MC_STORE_GREG_U32. */
11213DECL_INLINE_THROW(uint32_t)
11214iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
11215{
11216 Assert(iGReg < 16);
11217 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
11218
11219 /*
11220 * If it's a constant value (unlikely) we treat this as a
11221 * IEM_MC_STORE_GREG_U32_CONST statement.
11222 */
11223 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
11224 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
11225 { /* likely */ }
11226 else
11227 {
11228 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
11229 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
11230 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
11231 }
11232
11233 /*
11234 * For the rest we allocate a guest register for the variable and writes
11235 * it to the CPUMCTX structure.
11236 */
11237 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
11238 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11239#ifdef VBOX_STRICT
11240 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
11241#endif
11242 iemNativeVarRegisterRelease(pReNative, idxValueVar);
11243 return off;
11244}
11245
11246
11247#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
11248 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
11249
11250/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
11251DECL_INLINE_THROW(uint32_t)
11252iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
11253{
11254 Assert(iGReg < 16);
11255 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11256 kIemNativeGstRegUse_ForFullWrite);
11257 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
11258 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11259 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11260 return off;
11261}
11262
11263
11264#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
11265 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
11266
11267/** Emits code for IEM_MC_STORE_GREG_U64. */
11268DECL_INLINE_THROW(uint32_t)
11269iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
11270{
11271 Assert(iGReg < 16);
11272 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
11273
11274 /*
11275 * If it's a constant value (unlikely) we treat this as a
11276 * IEM_MC_STORE_GREG_U64_CONST statement.
11277 */
11278 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
11279 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
11280 { /* likely */ }
11281 else
11282 {
11283 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
11284 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
11285 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
11286 }
11287
11288 /*
11289 * For the rest we allocate a guest register for the variable and writes
11290 * it to the CPUMCTX structure.
11291 */
11292 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
11293 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11294 iemNativeVarRegisterRelease(pReNative, idxValueVar);
11295 return off;
11296}
11297
11298
11299#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
11300 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
11301
11302/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
11303DECL_INLINE_THROW(uint32_t)
11304iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
11305{
11306 Assert(iGReg < 16);
11307 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11308 kIemNativeGstRegUse_ForUpdate);
11309 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
11310 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11311 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11312 return off;
11313}
11314
11315
11316/*********************************************************************************************************************************
11317* General purpose register manipulation (add, sub). *
11318*********************************************************************************************************************************/
11319
11320#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
11321 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
11322
11323/** Emits code for IEM_MC_ADD_GREG_U16. */
11324DECL_INLINE_THROW(uint32_t)
11325iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
11326{
11327 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11328 kIemNativeGstRegUse_ForUpdate);
11329
11330#ifdef RT_ARCH_AMD64
11331 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
11332 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
11333 if (idxGstTmpReg >= 8)
11334 pbCodeBuf[off++] = X86_OP_REX_B;
11335 if (uAddend == 1)
11336 {
11337 pbCodeBuf[off++] = 0xff; /* inc */
11338 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
11339 }
11340 else
11341 {
11342 pbCodeBuf[off++] = 0x81;
11343 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
11344 pbCodeBuf[off++] = uAddend;
11345 pbCodeBuf[off++] = 0;
11346 }
11347
11348#else
11349 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
11350 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
11351
11352 /* sub tmp, gstgrp, uAddend */
11353 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
11354
11355 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
11356 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
11357
11358 iemNativeRegFreeTmp(pReNative, idxTmpReg);
11359#endif
11360
11361 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11362
11363 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11364
11365 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11366 return off;
11367}
11368
11369
11370#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
11371 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
11372
11373#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
11374 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
11375
11376/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
11377DECL_INLINE_THROW(uint32_t)
11378iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
11379{
11380 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11381 kIemNativeGstRegUse_ForUpdate);
11382
11383#ifdef RT_ARCH_AMD64
11384 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
11385 if (f64Bit)
11386 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
11387 else if (idxGstTmpReg >= 8)
11388 pbCodeBuf[off++] = X86_OP_REX_B;
11389 if (uAddend == 1)
11390 {
11391 pbCodeBuf[off++] = 0xff; /* inc */
11392 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
11393 }
11394 else if (uAddend < 128)
11395 {
11396 pbCodeBuf[off++] = 0x83; /* add */
11397 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
11398 pbCodeBuf[off++] = RT_BYTE1(uAddend);
11399 }
11400 else
11401 {
11402 pbCodeBuf[off++] = 0x81; /* add */
11403 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
11404 pbCodeBuf[off++] = RT_BYTE1(uAddend);
11405 pbCodeBuf[off++] = 0;
11406 pbCodeBuf[off++] = 0;
11407 pbCodeBuf[off++] = 0;
11408 }
11409
11410#else
11411 /* sub tmp, gstgrp, uAddend */
11412 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11413 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
11414
11415#endif
11416
11417 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11418
11419 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11420
11421 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11422 return off;
11423}
11424
11425
11426
11427#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
11428 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
11429
11430/** Emits code for IEM_MC_SUB_GREG_U16. */
11431DECL_INLINE_THROW(uint32_t)
11432iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
11433{
11434 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11435 kIemNativeGstRegUse_ForUpdate);
11436
11437#ifdef RT_ARCH_AMD64
11438 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
11439 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
11440 if (idxGstTmpReg >= 8)
11441 pbCodeBuf[off++] = X86_OP_REX_B;
11442 if (uSubtrahend == 1)
11443 {
11444 pbCodeBuf[off++] = 0xff; /* dec */
11445 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
11446 }
11447 else
11448 {
11449 pbCodeBuf[off++] = 0x81;
11450 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
11451 pbCodeBuf[off++] = uSubtrahend;
11452 pbCodeBuf[off++] = 0;
11453 }
11454
11455#else
11456 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
11457 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
11458
11459 /* sub tmp, gstgrp, uSubtrahend */
11460 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
11461
11462 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
11463 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
11464
11465 iemNativeRegFreeTmp(pReNative, idxTmpReg);
11466#endif
11467
11468 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11469
11470 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11471
11472 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11473 return off;
11474}
11475
11476
11477#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
11478 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
11479
11480#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
11481 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
11482
11483/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
11484DECL_INLINE_THROW(uint32_t)
11485iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
11486{
11487 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11488 kIemNativeGstRegUse_ForUpdate);
11489
11490#ifdef RT_ARCH_AMD64
11491 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
11492 if (f64Bit)
11493 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
11494 else if (idxGstTmpReg >= 8)
11495 pbCodeBuf[off++] = X86_OP_REX_B;
11496 if (uSubtrahend == 1)
11497 {
11498 pbCodeBuf[off++] = 0xff; /* dec */
11499 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
11500 }
11501 else if (uSubtrahend < 128)
11502 {
11503 pbCodeBuf[off++] = 0x83; /* sub */
11504 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
11505 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
11506 }
11507 else
11508 {
11509 pbCodeBuf[off++] = 0x81; /* sub */
11510 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
11511 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
11512 pbCodeBuf[off++] = 0;
11513 pbCodeBuf[off++] = 0;
11514 pbCodeBuf[off++] = 0;
11515 }
11516
11517#else
11518 /* sub tmp, gstgrp, uSubtrahend */
11519 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11520 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
11521
11522#endif
11523
11524 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11525
11526 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11527
11528 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11529 return off;
11530}
11531
11532
11533/*********************************************************************************************************************************
11534* Local variable manipulation (add, sub, and, or). *
11535*********************************************************************************************************************************/
11536
11537#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
11538 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
11539
11540#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
11541 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
11542
11543#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
11544 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
11545
11546#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
11547 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
11548
11549/** Emits code for AND'ing a local and a constant value. */
11550DECL_INLINE_THROW(uint32_t)
11551iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
11552{
11553#ifdef VBOX_STRICT
11554 switch (cbMask)
11555 {
11556 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
11557 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
11558 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
11559 case sizeof(uint64_t): break;
11560 default: AssertFailedBreak();
11561 }
11562#endif
11563
11564 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
11565 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
11566
11567 if (cbMask <= sizeof(uint32_t))
11568 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
11569 else
11570 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
11571
11572 iemNativeVarRegisterRelease(pReNative, idxVar);
11573 return off;
11574}
11575
11576
11577#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
11578 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
11579
11580#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
11581 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
11582
11583#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
11584 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
11585
11586#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
11587 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
11588
11589/** Emits code for OR'ing a local and a constant value. */
11590DECL_INLINE_THROW(uint32_t)
11591iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
11592{
11593#ifdef VBOX_STRICT
11594 switch (cbMask)
11595 {
11596 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
11597 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
11598 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
11599 case sizeof(uint64_t): break;
11600 default: AssertFailedBreak();
11601 }
11602#endif
11603
11604 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
11605 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
11606
11607 if (cbMask <= sizeof(uint32_t))
11608 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
11609 else
11610 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
11611
11612 iemNativeVarRegisterRelease(pReNative, idxVar);
11613 return off;
11614}
11615
11616
11617#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
11618 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
11619
11620#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
11621 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
11622
11623#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
11624 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
11625
11626/** Emits code for reversing the byte order in a local value. */
11627DECL_INLINE_THROW(uint32_t)
11628iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
11629{
11630 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
11631 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
11632
11633 switch (cbLocal)
11634 {
11635 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
11636 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
11637 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
11638 default: AssertFailedBreak();
11639 }
11640
11641 iemNativeVarRegisterRelease(pReNative, idxVar);
11642 return off;
11643}
11644
11645
11646
11647/*********************************************************************************************************************************
11648* EFLAGS *
11649*********************************************************************************************************************************/
11650
11651#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
11652# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
11653#else
11654# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
11655 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
11656
11657DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
11658{
11659 if (fEflOutput)
11660 {
11661 PVMCPUCC const pVCpu = pReNative->pVCpu;
11662# ifndef IEMLIVENESS_EXTENDED_LAYOUT
11663 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
11664 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
11665 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
11666# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
11667 if (fEflOutput & (a_fEfl)) \
11668 { \
11669 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
11670 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
11671 else \
11672 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
11673 } else do { } while (0)
11674# else
11675 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
11676 IEMLIVENESSBIT const LivenessClobbered =
11677 {
11678 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
11679 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
11680 | pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
11681 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
11682 };
11683 IEMLIVENESSBIT const LivenessDelayable =
11684 {
11685 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
11686 & pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
11687 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
11688 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
11689 };
11690# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
11691 if (fEflOutput & (a_fEfl)) \
11692 { \
11693 if (LivenessClobbered.a_fLivenessMember) \
11694 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
11695 else if (LivenessDelayable.a_fLivenessMember) \
11696 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
11697 else \
11698 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
11699 } else do { } while (0)
11700# endif
11701 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
11702 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
11703 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
11704 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
11705 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
11706 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
11707 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
11708# undef CHECK_FLAG_AND_UPDATE_STATS
11709 }
11710 RT_NOREF(fEflInput);
11711}
11712#endif /* VBOX_WITH_STATISTICS */
11713
11714#undef IEM_MC_FETCH_EFLAGS /* should not be used */
11715#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
11716 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags, a_fEflInput, a_fEflOutput)
11717
11718/** Handles IEM_MC_FETCH_EFLAGS_EX. */
11719DECL_INLINE_THROW(uint32_t)
11720iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags,
11721 uint32_t fEflInput, uint32_t fEflOutput)
11722{
11723 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
11724 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
11725 RT_NOREF(fEflInput, fEflOutput);
11726
11727#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
11728# ifdef VBOX_STRICT
11729 if ( pReNative->idxCurCall != 0
11730 && (fEflInput != 0 || fEflOutput != 0) /* for NOT these are both zero for now. */)
11731 {
11732 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
11733 uint32_t const fBoth = fEflInput | fEflOutput;
11734# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
11735 AssertMsg( !(fBoth & (a_fElfConst)) \
11736 || (!(fEflInput & (a_fElfConst)) \
11737 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
11738 : !(fEflOutput & (a_fElfConst)) \
11739 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
11740 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
11741 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
11742 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
11743 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
11744 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
11745 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
11746 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
11747 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
11748 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
11749# undef ASSERT_ONE_EFL
11750 }
11751# endif
11752#endif
11753
11754 /** @todo this is suboptimial. EFLAGS is probably shadowed and we should use
11755 * the existing shadow copy. */
11756 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
11757 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
11758 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
11759 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
11760 return off;
11761}
11762
11763
11764
11765/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
11766 * start using it with custom native code emission (inlining assembly
11767 * instruction helpers). */
11768#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
11769#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
11770 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
11771 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput)
11772
11773/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
11774DECL_INLINE_THROW(uint32_t)
11775iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fEflOutput)
11776{
11777 RT_NOREF(fEflOutput);
11778 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
11779 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
11780
11781#ifdef VBOX_STRICT
11782 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
11783 uint32_t offFixup = off;
11784 off = iemNativeEmitJnzToFixed(pReNative, off, off);
11785 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
11786 iemNativeFixupFixedJump(pReNative, offFixup, off);
11787
11788 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
11789 offFixup = off;
11790 off = iemNativeEmitJzToFixed(pReNative, off, off);
11791 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
11792 iemNativeFixupFixedJump(pReNative, offFixup, off);
11793
11794 /** @todo validate that only bits in the fElfOutput mask changed. */
11795#endif
11796
11797 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
11798 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
11799 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
11800 return off;
11801}
11802
11803
11804
11805/*********************************************************************************************************************************
11806* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
11807*********************************************************************************************************************************/
11808
11809#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
11810 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
11811
11812#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
11813 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
11814
11815#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
11816 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
11817
11818
11819/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
11820 * IEM_MC_FETCH_SREG_ZX_U64. */
11821DECL_INLINE_THROW(uint32_t)
11822iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
11823{
11824 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
11825 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
11826 Assert(iSReg < X86_SREG_COUNT);
11827
11828 /*
11829 * For now, we will not create a shadow copy of a selector. The rational
11830 * is that since we do not recompile the popping and loading of segment
11831 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
11832 * pushing and moving to registers, there is only a small chance that the
11833 * shadow copy will be accessed again before the register is reloaded. One
11834 * scenario would be nested called in 16-bit code, but I doubt it's worth
11835 * the extra register pressure atm.
11836 *
11837 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
11838 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
11839 * store scencario covered at present (r160730).
11840 */
11841 iemNativeVarSetKindToStack(pReNative, idxDstVar);
11842 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
11843 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
11844 iemNativeVarRegisterRelease(pReNative, idxDstVar);
11845 return off;
11846}
11847
11848
11849
11850/*********************************************************************************************************************************
11851* Register references. *
11852*********************************************************************************************************************************/
11853
11854#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
11855 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
11856
11857#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGReg) \
11858 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
11859
11860/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
11861DECL_INLINE_THROW(uint32_t)
11862iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
11863{
11864 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
11865 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
11866 Assert(iGRegEx < 20);
11867
11868 if (iGRegEx < 16)
11869 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
11870 else
11871 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
11872
11873 /* If we've delayed writing back the register value, flush it now. */
11874 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
11875
11876 /* If it's not a const reference we need to flush the shadow copy of the register now. */
11877 if (!fConst)
11878 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
11879
11880 return off;
11881}
11882
11883#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
11884 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
11885
11886#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
11887 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
11888
11889#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
11890 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
11891
11892#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
11893 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
11894
11895#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
11896 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
11897
11898#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
11899 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
11900
11901#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
11902 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
11903
11904#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
11905 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
11906
11907#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
11908 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
11909
11910#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
11911 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
11912
11913/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
11914DECL_INLINE_THROW(uint32_t)
11915iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
11916{
11917 Assert(iGReg < 16);
11918 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
11919 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
11920
11921 /* If we've delayed writing back the register value, flush it now. */
11922 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
11923
11924 /* If it's not a const reference we need to flush the shadow copy of the register now. */
11925 if (!fConst)
11926 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
11927
11928 return off;
11929}
11930
11931
11932#undef IEM_MC_REF_EFLAGS /* should not be used. */
11933#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
11934 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
11935 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags)
11936
11937/** Handles IEM_MC_REF_EFLAGS. */
11938DECL_INLINE_THROW(uint32_t)
11939iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
11940{
11941 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
11942 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
11943
11944 /* If we've delayed writing back the register value, flush it now. */
11945 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
11946
11947 /* If there is a shadow copy of guest EFLAGS, flush it now. */
11948 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
11949
11950 return off;
11951}
11952
11953
11954/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
11955 * different code from threaded recompiler, maybe it would be helpful. For now
11956 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
11957#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
11958
11959
11960#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg) \
11961 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, false /*fConst*/)
11962
11963#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
11964 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, true /*fConst*/)
11965
11966#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
11967 off = iemNativeEmitRefXregXxx(pReNative, off, a_pXmmDst, a_iXReg, true /*fConst*/)
11968
11969/** Handles IEM_MC_REF_XREG_xxx[_CONST]. */
11970DECL_INLINE_THROW(uint32_t)
11971iemNativeEmitRefXregXxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iXReg, bool fConst)
11972{
11973 Assert(iXReg < 16);
11974 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_XReg, iXReg);
11975 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
11976
11977 /* If we've delayed writing back the register value, flush it now. */
11978 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_XReg, iXReg);
11979
11980#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
11981 /* If it's not a const reference we need to flush the shadow copy of the register now. */
11982 if (!fConst)
11983 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(iXReg)));
11984#else
11985 RT_NOREF(fConst);
11986#endif
11987
11988 return off;
11989}
11990
11991
11992#define IEM_MC_REF_MXCSR(a_pfMxcsr) \
11993 off = iemNativeEmitRefMxcsr(pReNative, off, a_pfMxcsr)
11994
11995/** Handles IEM_MC_REF_MXCSR. */
11996DECL_INLINE_THROW(uint32_t)
11997iemNativeEmitRefMxcsr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
11998{
11999 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_MxCsr, 0);
12000 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
12001
12002 /* If we've delayed writing back the register value, flush it now. */
12003 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_MxCsr, 0);
12004
12005 /* If there is a shadow copy of guest MXCSR, flush it now. */
12006 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_MxCsr));
12007
12008 return off;
12009}
12010
12011
12012
12013/*********************************************************************************************************************************
12014* Effective Address Calculation *
12015*********************************************************************************************************************************/
12016#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
12017 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
12018
12019/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
12020 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
12021DECL_INLINE_THROW(uint32_t)
12022iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
12023 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
12024{
12025 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
12026
12027 /*
12028 * Handle the disp16 form with no registers first.
12029 *
12030 * Convert to an immediate value, as that'll delay the register allocation
12031 * and assignment till the memory access / call / whatever and we can use
12032 * a more appropriate register (or none at all).
12033 */
12034 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
12035 {
12036 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
12037 return off;
12038 }
12039
12040 /* Determin the displacment. */
12041 uint16_t u16EffAddr;
12042 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
12043 {
12044 case 0: u16EffAddr = 0; break;
12045 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
12046 case 2: u16EffAddr = u16Disp; break;
12047 default: AssertFailedStmt(u16EffAddr = 0);
12048 }
12049
12050 /* Determine the registers involved. */
12051 uint8_t idxGstRegBase;
12052 uint8_t idxGstRegIndex;
12053 switch (bRm & X86_MODRM_RM_MASK)
12054 {
12055 case 0:
12056 idxGstRegBase = X86_GREG_xBX;
12057 idxGstRegIndex = X86_GREG_xSI;
12058 break;
12059 case 1:
12060 idxGstRegBase = X86_GREG_xBX;
12061 idxGstRegIndex = X86_GREG_xDI;
12062 break;
12063 case 2:
12064 idxGstRegBase = X86_GREG_xBP;
12065 idxGstRegIndex = X86_GREG_xSI;
12066 break;
12067 case 3:
12068 idxGstRegBase = X86_GREG_xBP;
12069 idxGstRegIndex = X86_GREG_xDI;
12070 break;
12071 case 4:
12072 idxGstRegBase = X86_GREG_xSI;
12073 idxGstRegIndex = UINT8_MAX;
12074 break;
12075 case 5:
12076 idxGstRegBase = X86_GREG_xDI;
12077 idxGstRegIndex = UINT8_MAX;
12078 break;
12079 case 6:
12080 idxGstRegBase = X86_GREG_xBP;
12081 idxGstRegIndex = UINT8_MAX;
12082 break;
12083#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
12084 default:
12085#endif
12086 case 7:
12087 idxGstRegBase = X86_GREG_xBX;
12088 idxGstRegIndex = UINT8_MAX;
12089 break;
12090 }
12091
12092 /*
12093 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
12094 */
12095 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
12096 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
12097 kIemNativeGstRegUse_ReadOnly);
12098 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
12099 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
12100 kIemNativeGstRegUse_ReadOnly)
12101 : UINT8_MAX;
12102#ifdef RT_ARCH_AMD64
12103 if (idxRegIndex == UINT8_MAX)
12104 {
12105 if (u16EffAddr == 0)
12106 {
12107 /* movxz ret, base */
12108 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
12109 }
12110 else
12111 {
12112 /* lea ret32, [base64 + disp32] */
12113 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
12114 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
12115 if (idxRegRet >= 8 || idxRegBase >= 8)
12116 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
12117 pbCodeBuf[off++] = 0x8d;
12118 if (idxRegBase != X86_GREG_x12 /*SIB*/)
12119 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
12120 else
12121 {
12122 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
12123 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
12124 }
12125 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
12126 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
12127 pbCodeBuf[off++] = 0;
12128 pbCodeBuf[off++] = 0;
12129 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12130
12131 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
12132 }
12133 }
12134 else
12135 {
12136 /* lea ret32, [index64 + base64 (+ disp32)] */
12137 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
12138 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
12139 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
12140 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
12141 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
12142 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
12143 pbCodeBuf[off++] = 0x8d;
12144 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
12145 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
12146 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
12147 if (bMod == X86_MOD_MEM4)
12148 {
12149 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
12150 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
12151 pbCodeBuf[off++] = 0;
12152 pbCodeBuf[off++] = 0;
12153 }
12154 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12155 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
12156 }
12157
12158#elif defined(RT_ARCH_ARM64)
12159 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
12160 if (u16EffAddr == 0)
12161 {
12162 if (idxRegIndex == UINT8_MAX)
12163 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
12164 else
12165 {
12166 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
12167 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
12168 }
12169 }
12170 else
12171 {
12172 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
12173 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
12174 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
12175 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
12176 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
12177 else
12178 {
12179 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
12180 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
12181 }
12182 if (idxRegIndex != UINT8_MAX)
12183 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
12184 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
12185 }
12186
12187#else
12188# error "port me"
12189#endif
12190
12191 if (idxRegIndex != UINT8_MAX)
12192 iemNativeRegFreeTmp(pReNative, idxRegIndex);
12193 iemNativeRegFreeTmp(pReNative, idxRegBase);
12194 iemNativeVarRegisterRelease(pReNative, idxVarRet);
12195 return off;
12196}
12197
12198
12199#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
12200 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
12201
12202/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
12203 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
12204DECL_INLINE_THROW(uint32_t)
12205iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
12206 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
12207{
12208 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
12209
12210 /*
12211 * Handle the disp32 form with no registers first.
12212 *
12213 * Convert to an immediate value, as that'll delay the register allocation
12214 * and assignment till the memory access / call / whatever and we can use
12215 * a more appropriate register (or none at all).
12216 */
12217 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
12218 {
12219 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
12220 return off;
12221 }
12222
12223 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
12224 uint32_t u32EffAddr = 0;
12225 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
12226 {
12227 case 0: break;
12228 case 1: u32EffAddr = (int8_t)u32Disp; break;
12229 case 2: u32EffAddr = u32Disp; break;
12230 default: AssertFailed();
12231 }
12232
12233 /* Get the register (or SIB) value. */
12234 uint8_t idxGstRegBase = UINT8_MAX;
12235 uint8_t idxGstRegIndex = UINT8_MAX;
12236 uint8_t cShiftIndex = 0;
12237 switch (bRm & X86_MODRM_RM_MASK)
12238 {
12239 case 0: idxGstRegBase = X86_GREG_xAX; break;
12240 case 1: idxGstRegBase = X86_GREG_xCX; break;
12241 case 2: idxGstRegBase = X86_GREG_xDX; break;
12242 case 3: idxGstRegBase = X86_GREG_xBX; break;
12243 case 4: /* SIB */
12244 {
12245 /* index /w scaling . */
12246 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
12247 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
12248 {
12249 case 0: idxGstRegIndex = X86_GREG_xAX; break;
12250 case 1: idxGstRegIndex = X86_GREG_xCX; break;
12251 case 2: idxGstRegIndex = X86_GREG_xDX; break;
12252 case 3: idxGstRegIndex = X86_GREG_xBX; break;
12253 case 4: cShiftIndex = 0; /*no index*/ break;
12254 case 5: idxGstRegIndex = X86_GREG_xBP; break;
12255 case 6: idxGstRegIndex = X86_GREG_xSI; break;
12256 case 7: idxGstRegIndex = X86_GREG_xDI; break;
12257 }
12258
12259 /* base */
12260 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
12261 {
12262 case 0: idxGstRegBase = X86_GREG_xAX; break;
12263 case 1: idxGstRegBase = X86_GREG_xCX; break;
12264 case 2: idxGstRegBase = X86_GREG_xDX; break;
12265 case 3: idxGstRegBase = X86_GREG_xBX; break;
12266 case 4:
12267 idxGstRegBase = X86_GREG_xSP;
12268 u32EffAddr += uSibAndRspOffset >> 8;
12269 break;
12270 case 5:
12271 if ((bRm & X86_MODRM_MOD_MASK) != 0)
12272 idxGstRegBase = X86_GREG_xBP;
12273 else
12274 {
12275 Assert(u32EffAddr == 0);
12276 u32EffAddr = u32Disp;
12277 }
12278 break;
12279 case 6: idxGstRegBase = X86_GREG_xSI; break;
12280 case 7: idxGstRegBase = X86_GREG_xDI; break;
12281 }
12282 break;
12283 }
12284 case 5: idxGstRegBase = X86_GREG_xBP; break;
12285 case 6: idxGstRegBase = X86_GREG_xSI; break;
12286 case 7: idxGstRegBase = X86_GREG_xDI; break;
12287 }
12288
12289 /*
12290 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
12291 * the start of the function.
12292 */
12293 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
12294 {
12295 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
12296 return off;
12297 }
12298
12299 /*
12300 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
12301 */
12302 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
12303 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
12304 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
12305 kIemNativeGstRegUse_ReadOnly);
12306 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
12307 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
12308 kIemNativeGstRegUse_ReadOnly);
12309
12310 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
12311 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
12312 {
12313 idxRegBase = idxRegIndex;
12314 idxRegIndex = UINT8_MAX;
12315 }
12316
12317#ifdef RT_ARCH_AMD64
12318 if (idxRegIndex == UINT8_MAX)
12319 {
12320 if (u32EffAddr == 0)
12321 {
12322 /* mov ret, base */
12323 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
12324 }
12325 else
12326 {
12327 /* lea ret32, [base64 + disp32] */
12328 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
12329 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
12330 if (idxRegRet >= 8 || idxRegBase >= 8)
12331 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
12332 pbCodeBuf[off++] = 0x8d;
12333 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
12334 if (idxRegBase != X86_GREG_x12 /*SIB*/)
12335 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
12336 else
12337 {
12338 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
12339 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
12340 }
12341 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
12342 if (bMod == X86_MOD_MEM4)
12343 {
12344 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
12345 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
12346 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
12347 }
12348 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12349 }
12350 }
12351 else
12352 {
12353 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
12354 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
12355 if (idxRegBase == UINT8_MAX)
12356 {
12357 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
12358 if (idxRegRet >= 8 || idxRegIndex >= 8)
12359 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
12360 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
12361 pbCodeBuf[off++] = 0x8d;
12362 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
12363 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
12364 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
12365 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
12366 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
12367 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
12368 }
12369 else
12370 {
12371 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
12372 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
12373 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
12374 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
12375 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
12376 pbCodeBuf[off++] = 0x8d;
12377 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
12378 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
12379 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
12380 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
12381 if (bMod != X86_MOD_MEM0)
12382 {
12383 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
12384 if (bMod == X86_MOD_MEM4)
12385 {
12386 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
12387 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
12388 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
12389 }
12390 }
12391 }
12392 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12393 }
12394
12395#elif defined(RT_ARCH_ARM64)
12396 if (u32EffAddr == 0)
12397 {
12398 if (idxRegIndex == UINT8_MAX)
12399 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
12400 else if (idxRegBase == UINT8_MAX)
12401 {
12402 if (cShiftIndex == 0)
12403 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
12404 else
12405 {
12406 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12407 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
12408 }
12409 }
12410 else
12411 {
12412 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12413 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
12414 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
12415 }
12416 }
12417 else
12418 {
12419 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
12420 {
12421 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12422 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
12423 }
12424 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
12425 {
12426 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12427 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
12428 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
12429 }
12430 else
12431 {
12432 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
12433 if (idxRegBase != UINT8_MAX)
12434 {
12435 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12436 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
12437 }
12438 }
12439 if (idxRegIndex != UINT8_MAX)
12440 {
12441 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12442 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
12443 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
12444 }
12445 }
12446
12447#else
12448# error "port me"
12449#endif
12450
12451 if (idxRegIndex != UINT8_MAX)
12452 iemNativeRegFreeTmp(pReNative, idxRegIndex);
12453 if (idxRegBase != UINT8_MAX)
12454 iemNativeRegFreeTmp(pReNative, idxRegBase);
12455 iemNativeVarRegisterRelease(pReNative, idxVarRet);
12456 return off;
12457}
12458
12459
12460#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
12461 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
12462 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
12463
12464#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
12465 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
12466 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
12467
12468#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
12469 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
12470 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
12471
12472/**
12473 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
12474 *
12475 * @returns New off.
12476 * @param pReNative .
12477 * @param off .
12478 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
12479 * bit 4 to REX.X. The two bits are part of the
12480 * REG sub-field, which isn't needed in this
12481 * function.
12482 * @param uSibAndRspOffset Two parts:
12483 * - The first 8 bits make up the SIB byte.
12484 * - The next 8 bits are the fixed RSP/ESP offset
12485 * in case of a pop [xSP].
12486 * @param u32Disp The displacement byte/word/dword, if any.
12487 * @param cbInstr The size of the fully decoded instruction. Used
12488 * for RIP relative addressing.
12489 * @param idxVarRet The result variable number.
12490 * @param f64Bit Whether to use a 64-bit or 32-bit address size
12491 * when calculating the address.
12492 *
12493 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
12494 */
12495DECL_INLINE_THROW(uint32_t)
12496iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
12497 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
12498{
12499 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
12500
12501 /*
12502 * Special case the rip + disp32 form first.
12503 */
12504 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
12505 {
12506#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
12507 /* Need to take the current PC offset into account for the displacement, no need to flush here
12508 * as the PC is only accessed readonly and there is no branching or calling helpers involved. */
12509 u32Disp += pReNative->Core.offPc;
12510#endif
12511
12512 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
12513 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
12514 kIemNativeGstRegUse_ReadOnly);
12515#ifdef RT_ARCH_AMD64
12516 if (f64Bit)
12517 {
12518 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
12519 if ((int32_t)offFinalDisp == offFinalDisp)
12520 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
12521 else
12522 {
12523 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
12524 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
12525 }
12526 }
12527 else
12528 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
12529
12530#elif defined(RT_ARCH_ARM64)
12531 if (f64Bit)
12532 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
12533 (int64_t)(int32_t)u32Disp + cbInstr);
12534 else
12535 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
12536 (int32_t)u32Disp + cbInstr);
12537
12538#else
12539# error "Port me!"
12540#endif
12541 iemNativeRegFreeTmp(pReNative, idxRegPc);
12542 iemNativeVarRegisterRelease(pReNative, idxVarRet);
12543 return off;
12544 }
12545
12546 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
12547 int64_t i64EffAddr = 0;
12548 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
12549 {
12550 case 0: break;
12551 case 1: i64EffAddr = (int8_t)u32Disp; break;
12552 case 2: i64EffAddr = (int32_t)u32Disp; break;
12553 default: AssertFailed();
12554 }
12555
12556 /* Get the register (or SIB) value. */
12557 uint8_t idxGstRegBase = UINT8_MAX;
12558 uint8_t idxGstRegIndex = UINT8_MAX;
12559 uint8_t cShiftIndex = 0;
12560 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
12561 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
12562 else /* SIB: */
12563 {
12564 /* index /w scaling . */
12565 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
12566 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
12567 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
12568 if (idxGstRegIndex == 4)
12569 {
12570 /* no index */
12571 cShiftIndex = 0;
12572 idxGstRegIndex = UINT8_MAX;
12573 }
12574
12575 /* base */
12576 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
12577 if (idxGstRegBase == 4)
12578 {
12579 /* pop [rsp] hack */
12580 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
12581 }
12582 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
12583 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
12584 {
12585 /* mod=0 and base=5 -> disp32, no base reg. */
12586 Assert(i64EffAddr == 0);
12587 i64EffAddr = (int32_t)u32Disp;
12588 idxGstRegBase = UINT8_MAX;
12589 }
12590 }
12591
12592 /*
12593 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
12594 * the start of the function.
12595 */
12596 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
12597 {
12598 if (f64Bit)
12599 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
12600 else
12601 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
12602 return off;
12603 }
12604
12605 /*
12606 * Now emit code that calculates:
12607 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
12608 * or if !f64Bit:
12609 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
12610 */
12611 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
12612 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
12613 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
12614 kIemNativeGstRegUse_ReadOnly);
12615 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
12616 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
12617 kIemNativeGstRegUse_ReadOnly);
12618
12619 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
12620 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
12621 {
12622 idxRegBase = idxRegIndex;
12623 idxRegIndex = UINT8_MAX;
12624 }
12625
12626#ifdef RT_ARCH_AMD64
12627 uint8_t bFinalAdj;
12628 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
12629 bFinalAdj = 0; /* likely */
12630 else
12631 {
12632 /* pop [rsp] with a problematic disp32 value. Split out the
12633 RSP offset and add it separately afterwards (bFinalAdj). */
12634 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
12635 Assert(idxGstRegBase == X86_GREG_xSP);
12636 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
12637 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
12638 Assert(bFinalAdj != 0);
12639 i64EffAddr -= bFinalAdj;
12640 Assert((int32_t)i64EffAddr == i64EffAddr);
12641 }
12642 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
12643//pReNative->pInstrBuf[off++] = 0xcc;
12644
12645 if (idxRegIndex == UINT8_MAX)
12646 {
12647 if (u32EffAddr == 0)
12648 {
12649 /* mov ret, base */
12650 if (f64Bit)
12651 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
12652 else
12653 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
12654 }
12655 else
12656 {
12657 /* lea ret, [base + disp32] */
12658 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
12659 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
12660 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
12661 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
12662 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
12663 | (f64Bit ? X86_OP_REX_W : 0);
12664 pbCodeBuf[off++] = 0x8d;
12665 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
12666 if (idxRegBase != X86_GREG_x12 /*SIB*/)
12667 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
12668 else
12669 {
12670 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
12671 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
12672 }
12673 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
12674 if (bMod == X86_MOD_MEM4)
12675 {
12676 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
12677 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
12678 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
12679 }
12680 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12681 }
12682 }
12683 else
12684 {
12685 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
12686 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
12687 if (idxRegBase == UINT8_MAX)
12688 {
12689 /* lea ret, [(index64 << cShiftIndex) + disp32] */
12690 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
12691 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
12692 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
12693 | (f64Bit ? X86_OP_REX_W : 0);
12694 pbCodeBuf[off++] = 0x8d;
12695 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
12696 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
12697 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
12698 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
12699 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
12700 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
12701 }
12702 else
12703 {
12704 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
12705 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
12706 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
12707 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
12708 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
12709 | (f64Bit ? X86_OP_REX_W : 0);
12710 pbCodeBuf[off++] = 0x8d;
12711 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
12712 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
12713 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
12714 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
12715 if (bMod != X86_MOD_MEM0)
12716 {
12717 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
12718 if (bMod == X86_MOD_MEM4)
12719 {
12720 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
12721 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
12722 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
12723 }
12724 }
12725 }
12726 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12727 }
12728
12729 if (!bFinalAdj)
12730 { /* likely */ }
12731 else
12732 {
12733 Assert(f64Bit);
12734 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
12735 }
12736
12737#elif defined(RT_ARCH_ARM64)
12738 if (i64EffAddr == 0)
12739 {
12740 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12741 if (idxRegIndex == UINT8_MAX)
12742 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
12743 else if (idxRegBase != UINT8_MAX)
12744 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
12745 f64Bit, false /*fSetFlags*/, cShiftIndex);
12746 else
12747 {
12748 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
12749 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
12750 }
12751 }
12752 else
12753 {
12754 if (f64Bit)
12755 { /* likely */ }
12756 else
12757 i64EffAddr = (int32_t)i64EffAddr;
12758
12759 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
12760 {
12761 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12762 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
12763 }
12764 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
12765 {
12766 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12767 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
12768 }
12769 else
12770 {
12771 if (f64Bit)
12772 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
12773 else
12774 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
12775 if (idxRegBase != UINT8_MAX)
12776 {
12777 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12778 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
12779 }
12780 }
12781 if (idxRegIndex != UINT8_MAX)
12782 {
12783 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12784 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
12785 f64Bit, false /*fSetFlags*/, cShiftIndex);
12786 }
12787 }
12788
12789#else
12790# error "port me"
12791#endif
12792
12793 if (idxRegIndex != UINT8_MAX)
12794 iemNativeRegFreeTmp(pReNative, idxRegIndex);
12795 if (idxRegBase != UINT8_MAX)
12796 iemNativeRegFreeTmp(pReNative, idxRegBase);
12797 iemNativeVarRegisterRelease(pReNative, idxVarRet);
12798 return off;
12799}
12800
12801
12802/*********************************************************************************************************************************
12803* TLB Lookup. *
12804*********************************************************************************************************************************/
12805
12806/**
12807 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
12808 */
12809DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
12810{
12811 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
12812 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
12813 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
12814 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
12815
12816 /* Do the lookup manually. */
12817 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
12818 uint64_t const uTag = IEMTLB_CALC_TAG( &pVCpu->iem.s.DataTlb, GCPtrFlat);
12819 PIEMTLBENTRY const pTlbe = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
12820 if (RT_LIKELY(pTlbe->uTag == uTag))
12821 {
12822 /*
12823 * Check TLB page table level access flags.
12824 */
12825 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
12826 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
12827 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
12828 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
12829 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
12830 | IEMTLBE_F_PG_UNASSIGNED
12831 | IEMTLBE_F_PT_NO_ACCESSED
12832 | fNoWriteNoDirty | fNoUser);
12833 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
12834 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
12835 {
12836 /*
12837 * Return the address.
12838 */
12839 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
12840 if ((uintptr_t)pbAddr == uResult)
12841 return;
12842 RT_NOREF(cbMem);
12843 AssertFailed();
12844 }
12845 else
12846 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
12847 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
12848 }
12849 else
12850 AssertFailed();
12851 RT_BREAKPOINT();
12852}
12853
12854/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
12855
12856
12857/*********************************************************************************************************************************
12858* Memory fetches and stores common *
12859*********************************************************************************************************************************/
12860
12861typedef enum IEMNATIVEMITMEMOP
12862{
12863 kIemNativeEmitMemOp_Store = 0,
12864 kIemNativeEmitMemOp_Fetch,
12865 kIemNativeEmitMemOp_Fetch_Zx_U16,
12866 kIemNativeEmitMemOp_Fetch_Zx_U32,
12867 kIemNativeEmitMemOp_Fetch_Zx_U64,
12868 kIemNativeEmitMemOp_Fetch_Sx_U16,
12869 kIemNativeEmitMemOp_Fetch_Sx_U32,
12870 kIemNativeEmitMemOp_Fetch_Sx_U64
12871} IEMNATIVEMITMEMOP;
12872
12873/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
12874 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
12875 * (with iSegReg = UINT8_MAX). */
12876DECL_INLINE_THROW(uint32_t)
12877iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
12878 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint8_t fAlignMask, IEMNATIVEMITMEMOP enmOp,
12879 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
12880{
12881 /*
12882 * Assert sanity.
12883 */
12884 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
12885 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
12886 Assert( enmOp != kIemNativeEmitMemOp_Store
12887 || pVarValue->enmKind == kIemNativeVarKind_Immediate
12888 || pVarValue->enmKind == kIemNativeVarKind_Stack);
12889 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
12890 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
12891 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
12892 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
12893 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
12894 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
12895 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
12896 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
12897#ifdef VBOX_STRICT
12898 if (iSegReg == UINT8_MAX)
12899 {
12900 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
12901 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
12902 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
12903 switch (cbMem)
12904 {
12905 case 1:
12906 Assert( pfnFunction
12907 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
12908 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
12909 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
12910 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
12911 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
12912 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
12913 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
12914 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
12915 : UINT64_C(0xc000b000a0009000) ));
12916 break;
12917 case 2:
12918 Assert( pfnFunction
12919 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
12920 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
12921 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
12922 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
12923 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
12924 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
12925 : UINT64_C(0xc000b000a0009000) ));
12926 break;
12927 case 4:
12928 Assert( pfnFunction
12929 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
12930 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
12931 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
12932 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
12933 : UINT64_C(0xc000b000a0009000) ));
12934 break;
12935 case 8:
12936 Assert( pfnFunction
12937 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
12938 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
12939 : UINT64_C(0xc000b000a0009000) ));
12940 break;
12941 }
12942 }
12943 else
12944 {
12945 Assert(iSegReg < 6);
12946 switch (cbMem)
12947 {
12948 case 1:
12949 Assert( pfnFunction
12950 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
12951 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
12952 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
12953 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
12954 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
12955 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
12956 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
12957 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
12958 : UINT64_C(0xc000b000a0009000) ));
12959 break;
12960 case 2:
12961 Assert( pfnFunction
12962 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
12963 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
12964 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
12965 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
12966 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
12967 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
12968 : UINT64_C(0xc000b000a0009000) ));
12969 break;
12970 case 4:
12971 Assert( pfnFunction
12972 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
12973 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
12974 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
12975 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
12976 : UINT64_C(0xc000b000a0009000) ));
12977 break;
12978 case 8:
12979 Assert( pfnFunction
12980 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
12981 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
12982 : UINT64_C(0xc000b000a0009000) ));
12983 break;
12984 }
12985 }
12986#endif
12987
12988#ifdef VBOX_STRICT
12989 /*
12990 * Check that the fExec flags we've got make sense.
12991 */
12992 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
12993#endif
12994
12995 /*
12996 * To keep things simple we have to commit any pending writes first as we
12997 * may end up making calls.
12998 */
12999 /** @todo we could postpone this till we make the call and reload the
13000 * registers after returning from the call. Not sure if that's sensible or
13001 * not, though. */
13002#ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
13003 off = iemNativeRegFlushPendingWrites(pReNative, off);
13004#else
13005 /* The program counter is treated differently for now. */
13006 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
13007#endif
13008
13009#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13010 /*
13011 * Move/spill/flush stuff out of call-volatile registers.
13012 * This is the easy way out. We could contain this to the tlb-miss branch
13013 * by saving and restoring active stuff here.
13014 */
13015 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
13016#endif
13017
13018 /*
13019 * Define labels and allocate the result register (trying for the return
13020 * register if we can).
13021 */
13022 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
13023 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
13024 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
13025 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
13026 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
13027 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
13028 uint8_t const idxRegValueStore = !TlbState.fSkip
13029 && enmOp == kIemNativeEmitMemOp_Store
13030 && pVarValue->enmKind != kIemNativeVarKind_Immediate
13031 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off)
13032 : UINT8_MAX;
13033 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
13034 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
13035 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
13036 : UINT32_MAX;
13037
13038 /*
13039 * Jump to the TLB lookup code.
13040 */
13041 if (!TlbState.fSkip)
13042 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
13043
13044 /*
13045 * TlbMiss:
13046 *
13047 * Call helper to do the fetching.
13048 * We flush all guest register shadow copies here.
13049 */
13050 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
13051
13052#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
13053 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
13054#else
13055 RT_NOREF(idxInstr);
13056#endif
13057
13058#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
13059 if (pReNative->Core.offPc)
13060 {
13061 /*
13062 * Update the program counter but restore it at the end of the TlbMiss branch.
13063 * This should allow delaying more program counter updates for the TlbLookup and hit paths
13064 * which are hopefully much more frequent, reducing the amount of memory accesses.
13065 */
13066 /* Allocate a temporary PC register. */
13067 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
13068
13069 /* Perform the addition and store the result. */
13070 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
13071 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
13072
13073 /* Free and flush the PC register. */
13074 iemNativeRegFreeTmp(pReNative, idxPcReg);
13075 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
13076 }
13077#endif
13078
13079#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13080 /* Save variables in volatile registers. */
13081 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
13082 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
13083 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
13084 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
13085#endif
13086
13087 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
13088 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
13089 if (enmOp == kIemNativeEmitMemOp_Store)
13090 {
13091 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
13092 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
13093#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13094 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
13095#else
13096 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
13097 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
13098#endif
13099 }
13100
13101 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
13102 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
13103#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13104 fVolGregMask);
13105#else
13106 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
13107#endif
13108
13109 if (iSegReg != UINT8_MAX)
13110 {
13111 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
13112 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
13113 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
13114 }
13115
13116 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
13117 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
13118
13119 /* Done setting up parameters, make the call. */
13120 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
13121
13122 /*
13123 * Put the result in the right register if this is a fetch.
13124 */
13125 if (enmOp != kIemNativeEmitMemOp_Store)
13126 {
13127 Assert(idxRegValueFetch == pVarValue->idxReg);
13128 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
13129 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
13130 }
13131
13132#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13133 /* Restore variables and guest shadow registers to volatile registers. */
13134 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
13135 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
13136#endif
13137
13138#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
13139 if (pReNative->Core.offPc)
13140 {
13141 /*
13142 * Time to restore the program counter to its original value.
13143 */
13144 /* Allocate a temporary PC register. */
13145 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
13146
13147 /* Restore the original value. */
13148 off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
13149 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
13150
13151 /* Free and flush the PC register. */
13152 iemNativeRegFreeTmp(pReNative, idxPcReg);
13153 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
13154 }
13155#endif
13156
13157#ifdef IEMNATIVE_WITH_TLB_LOOKUP
13158 if (!TlbState.fSkip)
13159 {
13160 /* end of TlbMiss - Jump to the done label. */
13161 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
13162 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
13163
13164 /*
13165 * TlbLookup:
13166 */
13167 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask,
13168 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
13169 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
13170
13171 /*
13172 * Emit code to do the actual storing / fetching.
13173 */
13174 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
13175# ifdef VBOX_WITH_STATISTICS
13176 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
13177 enmOp == kIemNativeEmitMemOp_Store
13178 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
13179 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
13180# endif
13181 switch (enmOp)
13182 {
13183 case kIemNativeEmitMemOp_Store:
13184 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
13185 {
13186 switch (cbMem)
13187 {
13188 case 1:
13189 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
13190 break;
13191 case 2:
13192 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
13193 break;
13194 case 4:
13195 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
13196 break;
13197 case 8:
13198 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
13199 break;
13200 default:
13201 AssertFailed();
13202 }
13203 }
13204 else
13205 {
13206 switch (cbMem)
13207 {
13208 case 1:
13209 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
13210 idxRegMemResult, TlbState.idxReg1);
13211 break;
13212 case 2:
13213 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
13214 idxRegMemResult, TlbState.idxReg1);
13215 break;
13216 case 4:
13217 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
13218 idxRegMemResult, TlbState.idxReg1);
13219 break;
13220 case 8:
13221 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
13222 idxRegMemResult, TlbState.idxReg1);
13223 break;
13224 default:
13225 AssertFailed();
13226 }
13227 }
13228 break;
13229
13230 case kIemNativeEmitMemOp_Fetch:
13231 case kIemNativeEmitMemOp_Fetch_Zx_U16:
13232 case kIemNativeEmitMemOp_Fetch_Zx_U32:
13233 case kIemNativeEmitMemOp_Fetch_Zx_U64:
13234 switch (cbMem)
13235 {
13236 case 1:
13237 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13238 break;
13239 case 2:
13240 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13241 break;
13242 case 4:
13243 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13244 break;
13245 case 8:
13246 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13247 break;
13248 default:
13249 AssertFailed();
13250 }
13251 break;
13252
13253 case kIemNativeEmitMemOp_Fetch_Sx_U16:
13254 Assert(cbMem == 1);
13255 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13256 break;
13257
13258 case kIemNativeEmitMemOp_Fetch_Sx_U32:
13259 Assert(cbMem == 1 || cbMem == 2);
13260 if (cbMem == 1)
13261 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13262 else
13263 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13264 break;
13265
13266 case kIemNativeEmitMemOp_Fetch_Sx_U64:
13267 switch (cbMem)
13268 {
13269 case 1:
13270 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13271 break;
13272 case 2:
13273 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13274 break;
13275 case 4:
13276 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13277 break;
13278 default:
13279 AssertFailed();
13280 }
13281 break;
13282
13283 default:
13284 AssertFailed();
13285 }
13286
13287 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
13288
13289 /*
13290 * TlbDone:
13291 */
13292 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
13293
13294 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
13295
13296# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13297 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
13298 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
13299# endif
13300 }
13301#else
13302 RT_NOREF(fAlignMask, idxLabelTlbMiss);
13303#endif
13304
13305 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
13306 iemNativeVarRegisterRelease(pReNative, idxVarValue);
13307 return off;
13308}
13309
13310
13311
13312/*********************************************************************************************************************************
13313* Memory fetches (IEM_MEM_FETCH_XXX). *
13314*********************************************************************************************************************************/
13315
13316/* 8-bit segmented: */
13317#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
13318 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
13319 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
13320 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
13321
13322#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
13323 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
13324 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
13325 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
13326
13327#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
13328 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
13329 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
13330 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
13331
13332#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
13333 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
13334 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
13335 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
13336
13337#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
13338 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
13339 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
13340 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
13341
13342#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
13343 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
13344 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
13345 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
13346
13347#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
13348 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
13349 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
13350 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
13351
13352/* 16-bit segmented: */
13353#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
13354 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
13355 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
13356 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
13357
13358#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
13359 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
13360 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
13361 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
13362
13363#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
13364 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
13365 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
13366 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
13367
13368#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
13369 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
13370 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
13371 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
13372
13373#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
13374 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
13375 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
13376 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
13377
13378#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
13379 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
13380 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
13381 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
13382
13383
13384/* 32-bit segmented: */
13385#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
13386 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
13387 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
13388 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
13389
13390#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
13391 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
13392 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
13393 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
13394
13395#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
13396 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
13397 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
13398 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
13399
13400#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
13401 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
13402 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
13403 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
13404
13405
13406/* 64-bit segmented: */
13407#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
13408 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
13409 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
13410 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
13411
13412
13413
13414/* 8-bit flat: */
13415#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
13416 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
13417 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
13418 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
13419
13420#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
13421 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
13422 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
13423 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
13424
13425#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
13426 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
13427 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
13428 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
13429
13430#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
13431 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
13432 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
13433 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
13434
13435#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
13436 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
13437 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
13438 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
13439
13440#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
13441 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
13442 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
13443 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
13444
13445#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
13446 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
13447 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
13448 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
13449
13450
13451/* 16-bit flat: */
13452#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
13453 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
13454 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
13455 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
13456
13457#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
13458 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
13459 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
13460 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
13461
13462#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
13463 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
13464 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
13465 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
13466
13467#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
13468 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
13469 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
13470 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
13471
13472#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
13473 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
13474 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
13475 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
13476
13477#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
13478 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
13479 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
13480 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
13481
13482/* 32-bit flat: */
13483#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
13484 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
13485 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
13486 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
13487
13488#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
13489 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
13490 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
13491 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
13492
13493#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
13494 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
13495 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
13496 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
13497
13498#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
13499 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
13500 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
13501 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
13502
13503/* 64-bit flat: */
13504#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
13505 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
13506 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
13507 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
13508
13509
13510
13511/*********************************************************************************************************************************
13512* Memory stores (IEM_MEM_STORE_XXX). *
13513*********************************************************************************************************************************/
13514
13515#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
13516 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
13517 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
13518 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
13519
13520#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
13521 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
13522 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
13523 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
13524
13525#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
13526 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
13527 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
13528 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
13529
13530#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
13531 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
13532 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
13533 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
13534
13535
13536#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
13537 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
13538 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
13539 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
13540
13541#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
13542 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
13543 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
13544 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
13545
13546#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
13547 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
13548 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
13549 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
13550
13551#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
13552 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
13553 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
13554 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
13555
13556
13557#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
13558 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
13559 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
13560
13561#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
13562 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
13563 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
13564
13565#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
13566 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
13567 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
13568
13569#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
13570 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
13571 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
13572
13573
13574#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
13575 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
13576 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
13577
13578#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
13579 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
13580 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
13581
13582#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
13583 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
13584 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
13585
13586#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
13587 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
13588 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
13589
13590/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
13591 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
13592DECL_INLINE_THROW(uint32_t)
13593iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
13594 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
13595{
13596 /*
13597 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
13598 * to do the grunt work.
13599 */
13600 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
13601 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
13602 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
13603 pfnFunction, idxInstr);
13604 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
13605 return off;
13606}
13607
13608
13609
13610/*********************************************************************************************************************************
13611* Stack Accesses. *
13612*********************************************************************************************************************************/
13613/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
13614#define IEM_MC_PUSH_U16(a_u16Value) \
13615 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
13616 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
13617#define IEM_MC_PUSH_U32(a_u32Value) \
13618 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
13619 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
13620#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
13621 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
13622 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
13623#define IEM_MC_PUSH_U64(a_u64Value) \
13624 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
13625 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
13626
13627#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
13628 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
13629 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
13630#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
13631 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
13632 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
13633#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
13634 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
13635 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
13636
13637#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
13638 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
13639 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
13640#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
13641 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
13642 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
13643
13644
13645DECL_FORCE_INLINE_THROW(uint32_t)
13646iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
13647{
13648 /* Use16BitSp: */
13649#ifdef RT_ARCH_AMD64
13650 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
13651 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
13652#else
13653 /* sub regeff, regrsp, #cbMem */
13654 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
13655 /* and regeff, regeff, #0xffff */
13656 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
13657 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
13658 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
13659 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
13660#endif
13661 return off;
13662}
13663
13664
13665DECL_FORCE_INLINE(uint32_t)
13666iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
13667{
13668 /* Use32BitSp: */
13669 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
13670 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
13671 return off;
13672}
13673
13674
13675/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
13676DECL_INLINE_THROW(uint32_t)
13677iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
13678 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
13679{
13680 /*
13681 * Assert sanity.
13682 */
13683 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
13684 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
13685#ifdef VBOX_STRICT
13686 if (RT_BYTE2(cBitsVarAndFlat) != 0)
13687 {
13688 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
13689 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
13690 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
13691 Assert( pfnFunction
13692 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
13693 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
13694 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
13695 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
13696 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
13697 : UINT64_C(0xc000b000a0009000) ));
13698 }
13699 else
13700 Assert( pfnFunction
13701 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
13702 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
13703 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
13704 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
13705 : UINT64_C(0xc000b000a0009000) ));
13706#endif
13707
13708#ifdef VBOX_STRICT
13709 /*
13710 * Check that the fExec flags we've got make sense.
13711 */
13712 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
13713#endif
13714
13715 /*
13716 * To keep things simple we have to commit any pending writes first as we
13717 * may end up making calls.
13718 */
13719 /** @todo we could postpone this till we make the call and reload the
13720 * registers after returning from the call. Not sure if that's sensible or
13721 * not, though. */
13722 off = iemNativeRegFlushPendingWrites(pReNative, off);
13723
13724 /*
13725 * First we calculate the new RSP and the effective stack pointer value.
13726 * For 64-bit mode and flat 32-bit these two are the same.
13727 * (Code structure is very similar to that of PUSH)
13728 */
13729 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
13730 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
13731 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
13732 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
13733 ? cbMem : sizeof(uint16_t);
13734 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
13735 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
13736 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
13737 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
13738 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
13739 if (cBitsFlat != 0)
13740 {
13741 Assert(idxRegEffSp == idxRegRsp);
13742 Assert(cBitsFlat == 32 || cBitsFlat == 64);
13743 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
13744 if (cBitsFlat == 64)
13745 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
13746 else
13747 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
13748 }
13749 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
13750 {
13751 Assert(idxRegEffSp != idxRegRsp);
13752 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
13753 kIemNativeGstRegUse_ReadOnly);
13754#ifdef RT_ARCH_AMD64
13755 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
13756#else
13757 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
13758#endif
13759 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
13760 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
13761 offFixupJumpToUseOtherBitSp = off;
13762 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
13763 {
13764 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
13765 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
13766 }
13767 else
13768 {
13769 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
13770 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
13771 }
13772 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
13773 }
13774 /* SpUpdateEnd: */
13775 uint32_t const offLabelSpUpdateEnd = off;
13776
13777 /*
13778 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
13779 * we're skipping lookup).
13780 */
13781 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
13782 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
13783 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
13784 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
13785 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
13786 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
13787 : UINT32_MAX;
13788 uint8_t const idxRegValue = !TlbState.fSkip
13789 && pVarValue->enmKind != kIemNativeVarKind_Immediate
13790 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
13791 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
13792 : UINT8_MAX;
13793 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
13794
13795
13796 if (!TlbState.fSkip)
13797 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
13798 else
13799 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
13800
13801 /*
13802 * Use16BitSp:
13803 */
13804 if (cBitsFlat == 0)
13805 {
13806#ifdef RT_ARCH_AMD64
13807 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
13808#else
13809 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
13810#endif
13811 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
13812 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
13813 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
13814 else
13815 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
13816 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
13817 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
13818 }
13819
13820 /*
13821 * TlbMiss:
13822 *
13823 * Call helper to do the pushing.
13824 */
13825 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
13826
13827#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
13828 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
13829#else
13830 RT_NOREF(idxInstr);
13831#endif
13832
13833 /* Save variables in volatile registers. */
13834 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
13835 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
13836 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
13837 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
13838 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
13839
13840 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
13841 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
13842 {
13843 /* Swap them using ARG0 as temp register: */
13844 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
13845 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
13846 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
13847 }
13848 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
13849 {
13850 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
13851 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
13852 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
13853
13854 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
13855 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
13856 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
13857 }
13858 else
13859 {
13860 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
13861 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
13862
13863 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
13864 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
13865 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~IEMNATIVE_CALL_ARG1_GREG);
13866 }
13867
13868 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
13869 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
13870
13871 /* Done setting up parameters, make the call. */
13872 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
13873
13874 /* Restore variables and guest shadow registers to volatile registers. */
13875 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
13876 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
13877
13878#ifdef IEMNATIVE_WITH_TLB_LOOKUP
13879 if (!TlbState.fSkip)
13880 {
13881 /* end of TlbMiss - Jump to the done label. */
13882 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
13883 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
13884
13885 /*
13886 * TlbLookup:
13887 */
13888 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
13889 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
13890
13891 /*
13892 * Emit code to do the actual storing / fetching.
13893 */
13894 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
13895# ifdef VBOX_WITH_STATISTICS
13896 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
13897 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
13898# endif
13899 if (idxRegValue != UINT8_MAX)
13900 {
13901 switch (cbMemAccess)
13902 {
13903 case 2:
13904 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
13905 break;
13906 case 4:
13907 if (!fIsIntelSeg)
13908 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
13909 else
13910 {
13911 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
13912 PUSH FS in real mode, so we have to try emulate that here.
13913 We borrow the now unused idxReg1 from the TLB lookup code here. */
13914 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
13915 kIemNativeGstReg_EFlags);
13916 if (idxRegEfl != UINT8_MAX)
13917 {
13918#ifdef ARCH_AMD64
13919 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
13920 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
13921 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
13922#else
13923 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
13924 off, TlbState.idxReg1, idxRegEfl,
13925 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
13926#endif
13927 iemNativeRegFreeTmp(pReNative, idxRegEfl);
13928 }
13929 else
13930 {
13931 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
13932 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
13933 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
13934 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
13935 }
13936 /* ASSUMES the upper half of idxRegValue is ZERO. */
13937 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
13938 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
13939 }
13940 break;
13941 case 8:
13942 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
13943 break;
13944 default:
13945 AssertFailed();
13946 }
13947 }
13948 else
13949 {
13950 switch (cbMemAccess)
13951 {
13952 case 2:
13953 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
13954 idxRegMemResult, TlbState.idxReg1);
13955 break;
13956 case 4:
13957 Assert(!fIsSegReg);
13958 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
13959 idxRegMemResult, TlbState.idxReg1);
13960 break;
13961 case 8:
13962 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
13963 break;
13964 default:
13965 AssertFailed();
13966 }
13967 }
13968
13969 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
13970 TlbState.freeRegsAndReleaseVars(pReNative);
13971
13972 /*
13973 * TlbDone:
13974 *
13975 * Commit the new RSP value.
13976 */
13977 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
13978 }
13979#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
13980
13981 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
13982 iemNativeRegFreeTmp(pReNative, idxRegRsp);
13983 if (idxRegEffSp != idxRegRsp)
13984 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
13985
13986 /* The value variable is implictly flushed. */
13987 if (idxRegValue != UINT8_MAX)
13988 iemNativeVarRegisterRelease(pReNative, idxVarValue);
13989 iemNativeVarFreeLocal(pReNative, idxVarValue);
13990
13991 return off;
13992}
13993
13994
13995
13996/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
13997#define IEM_MC_POP_GREG_U16(a_iGReg) \
13998 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
13999 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
14000#define IEM_MC_POP_GREG_U32(a_iGReg) \
14001 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
14002 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
14003#define IEM_MC_POP_GREG_U64(a_iGReg) \
14004 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
14005 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
14006
14007#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
14008 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
14009 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
14010#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
14011 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
14012 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
14013
14014#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
14015 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
14016 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
14017#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
14018 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
14019 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
14020
14021
14022DECL_FORCE_INLINE_THROW(uint32_t)
14023iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
14024 uint8_t idxRegTmp)
14025{
14026 /* Use16BitSp: */
14027#ifdef RT_ARCH_AMD64
14028 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
14029 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
14030 RT_NOREF(idxRegTmp);
14031#else
14032 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
14033 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
14034 /* add tmp, regrsp, #cbMem */
14035 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
14036 /* and tmp, tmp, #0xffff */
14037 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
14038 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
14039 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
14040 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
14041#endif
14042 return off;
14043}
14044
14045
14046DECL_FORCE_INLINE(uint32_t)
14047iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
14048{
14049 /* Use32BitSp: */
14050 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
14051 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
14052 return off;
14053}
14054
14055
14056/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
14057DECL_INLINE_THROW(uint32_t)
14058iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
14059 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
14060{
14061 /*
14062 * Assert sanity.
14063 */
14064 Assert(idxGReg < 16);
14065#ifdef VBOX_STRICT
14066 if (RT_BYTE2(cBitsVarAndFlat) != 0)
14067 {
14068 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
14069 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
14070 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
14071 Assert( pfnFunction
14072 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
14073 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
14074 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
14075 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
14076 : UINT64_C(0xc000b000a0009000) ));
14077 }
14078 else
14079 Assert( pfnFunction
14080 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
14081 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
14082 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
14083 : UINT64_C(0xc000b000a0009000) ));
14084#endif
14085
14086#ifdef VBOX_STRICT
14087 /*
14088 * Check that the fExec flags we've got make sense.
14089 */
14090 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
14091#endif
14092
14093 /*
14094 * To keep things simple we have to commit any pending writes first as we
14095 * may end up making calls.
14096 */
14097 off = iemNativeRegFlushPendingWrites(pReNative, off);
14098
14099 /*
14100 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
14101 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
14102 * directly as the effective stack pointer.
14103 * (Code structure is very similar to that of PUSH)
14104 */
14105 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
14106 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
14107 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
14108 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
14109 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
14110 /** @todo can do a better job picking the register here. For cbMem >= 4 this
14111 * will be the resulting register value. */
14112 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
14113
14114 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
14115 if (cBitsFlat != 0)
14116 {
14117 Assert(idxRegEffSp == idxRegRsp);
14118 Assert(cBitsFlat == 32 || cBitsFlat == 64);
14119 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
14120 }
14121 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
14122 {
14123 Assert(idxRegEffSp != idxRegRsp);
14124 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
14125 kIemNativeGstRegUse_ReadOnly);
14126#ifdef RT_ARCH_AMD64
14127 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
14128#else
14129 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
14130#endif
14131 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
14132 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
14133 offFixupJumpToUseOtherBitSp = off;
14134 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
14135 {
14136/** @todo can skip idxRegRsp updating when popping ESP. */
14137 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
14138 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
14139 }
14140 else
14141 {
14142 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
14143 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
14144 }
14145 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
14146 }
14147 /* SpUpdateEnd: */
14148 uint32_t const offLabelSpUpdateEnd = off;
14149
14150 /*
14151 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
14152 * we're skipping lookup).
14153 */
14154 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
14155 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
14156 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
14157 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
14158 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
14159 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
14160 : UINT32_MAX;
14161
14162 if (!TlbState.fSkip)
14163 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
14164 else
14165 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
14166
14167 /*
14168 * Use16BitSp:
14169 */
14170 if (cBitsFlat == 0)
14171 {
14172#ifdef RT_ARCH_AMD64
14173 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
14174#else
14175 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
14176#endif
14177 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
14178 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
14179 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
14180 else
14181 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
14182 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
14183 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
14184 }
14185
14186 /*
14187 * TlbMiss:
14188 *
14189 * Call helper to do the pushing.
14190 */
14191 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
14192
14193#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
14194 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
14195#else
14196 RT_NOREF(idxInstr);
14197#endif
14198
14199 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
14200 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
14201 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
14202 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
14203
14204
14205 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
14206 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
14207 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
14208
14209 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
14210 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
14211
14212 /* Done setting up parameters, make the call. */
14213 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
14214
14215 /* Move the return register content to idxRegMemResult. */
14216 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
14217 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
14218
14219 /* Restore variables and guest shadow registers to volatile registers. */
14220 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
14221 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
14222
14223#ifdef IEMNATIVE_WITH_TLB_LOOKUP
14224 if (!TlbState.fSkip)
14225 {
14226 /* end of TlbMiss - Jump to the done label. */
14227 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
14228 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
14229
14230 /*
14231 * TlbLookup:
14232 */
14233 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
14234 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
14235
14236 /*
14237 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
14238 */
14239 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
14240# ifdef VBOX_WITH_STATISTICS
14241 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
14242 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
14243# endif
14244 switch (cbMem)
14245 {
14246 case 2:
14247 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
14248 break;
14249 case 4:
14250 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
14251 break;
14252 case 8:
14253 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
14254 break;
14255 default:
14256 AssertFailed();
14257 }
14258
14259 TlbState.freeRegsAndReleaseVars(pReNative);
14260
14261 /*
14262 * TlbDone:
14263 *
14264 * Set the new RSP value (FLAT accesses needs to calculate it first) and
14265 * commit the popped register value.
14266 */
14267 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
14268 }
14269#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
14270
14271 if (idxGReg != X86_GREG_xSP)
14272 {
14273 /* Set the register. */
14274 if (cbMem >= sizeof(uint32_t))
14275 {
14276#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
14277 AssertMsg( pReNative->idxCurCall == 0
14278 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
14279 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
14280#endif
14281 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
14282 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
14283 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
14284 }
14285 else
14286 {
14287 Assert(cbMem == sizeof(uint16_t));
14288 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
14289 kIemNativeGstRegUse_ForUpdate);
14290 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
14291 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
14292 iemNativeRegFreeTmp(pReNative, idxRegDst);
14293 }
14294
14295 /* Complete RSP calculation for FLAT mode. */
14296 if (idxRegEffSp == idxRegRsp)
14297 {
14298 if (cBitsFlat == 64)
14299 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
14300 else
14301 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
14302 }
14303 }
14304 else
14305 {
14306 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
14307 if (cbMem == sizeof(uint64_t))
14308 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
14309 else if (cbMem == sizeof(uint32_t))
14310 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
14311 else
14312 {
14313 if (idxRegEffSp == idxRegRsp)
14314 {
14315 if (cBitsFlat == 64)
14316 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
14317 else
14318 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
14319 }
14320 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
14321 }
14322 }
14323 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
14324
14325 iemNativeRegFreeTmp(pReNative, idxRegRsp);
14326 if (idxRegEffSp != idxRegRsp)
14327 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
14328 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
14329
14330 return off;
14331}
14332
14333
14334
14335/*********************************************************************************************************************************
14336* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
14337*********************************************************************************************************************************/
14338
14339#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14340 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
14341 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
14342 (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
14343
14344#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14345 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
14346 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
14347 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
14348
14349#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14350 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
14351 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
14352 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
14353
14354#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14355 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
14356 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
14357 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
14358
14359
14360#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14361 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
14362 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14363 (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
14364
14365#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14366 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
14367 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14368 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
14369
14370#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14371 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
14372 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14373 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
14374
14375#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14376 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
14377 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14378 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
14379
14380#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14381 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
14382 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14383 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
14384
14385
14386#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14387 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
14388 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14389 (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
14390
14391#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14392 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
14393 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14394 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
14395
14396#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14397 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
14398 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14399 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
14400
14401#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14402 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
14403 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14404 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
14405
14406#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14407 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
14408 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14409 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
14410
14411
14412#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14413 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
14414 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14415 (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
14416
14417#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14418 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
14419 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14420 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
14421#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14422 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
14423 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14424 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
14425
14426#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14427 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
14428 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14429 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
14430
14431#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14432 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
14433 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14434 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
14435
14436
14437#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14438 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
14439 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14440 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
14441
14442#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14443 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
14444 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
14445 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
14446
14447
14448#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14449 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
14450 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14451 (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
14452
14453#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14454 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
14455 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14456 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
14457
14458#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14459 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
14460 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14461 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
14462
14463#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14464 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
14465 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14466 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
14467
14468
14469
14470#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
14471 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
14472 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
14473 (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
14474
14475#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
14476 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
14477 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
14478 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
14479
14480#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
14481 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
14482 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
14483 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
14484
14485#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
14486 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
14487 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
14488 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
14489
14490
14491#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
14492 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
14493 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14494 (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
14495
14496#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
14497 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
14498 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14499 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
14500
14501#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
14502 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
14503 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14504 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
14505
14506#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
14507 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
14508 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14509 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
14510
14511#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
14512 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
14513 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14514 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
14515
14516
14517#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
14518 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
14519 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14520 (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
14521
14522#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
14523 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
14524 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14525 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
14526
14527#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
14528 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
14529 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14530 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
14531
14532#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
14533 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
14534 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14535 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
14536
14537#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
14538 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
14539 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14540 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
14541
14542
14543#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
14544 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
14545 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14546 (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
14547
14548#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
14549 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
14550 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14551 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
14552
14553#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
14554 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
14555 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14556 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
14557
14558#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
14559 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
14560 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14561 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
14562
14563#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
14564 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
14565 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14566 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
14567
14568
14569#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
14570 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
14571 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14572 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
14573
14574#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
14575 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
14576 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
14577 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
14578
14579
14580#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
14581 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
14582 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14583 (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
14584
14585#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
14586 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
14587 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14588 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
14589
14590#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
14591 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
14592 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14593 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
14594
14595#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
14596 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
14597 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14598 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
14599
14600
14601DECL_INLINE_THROW(uint32_t)
14602iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
14603 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint8_t fAlignMask,
14604 uintptr_t pfnFunction, uint8_t idxInstr)
14605{
14606 /*
14607 * Assert sanity.
14608 */
14609 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
14610 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
14611 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
14612 && pVarMem->cbVar == sizeof(void *),
14613 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
14614
14615 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
14616 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
14617 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
14618 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
14619 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
14620
14621 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
14622 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
14623 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
14624 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
14625 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
14626
14627 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
14628
14629 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
14630
14631#ifdef VBOX_STRICT
14632# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
14633 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
14634 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
14635 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
14636 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
14637# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
14638 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
14639 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
14640 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
14641
14642 if (iSegReg == UINT8_MAX)
14643 {
14644 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
14645 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
14646 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
14647 switch (cbMem)
14648 {
14649 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8)); break;
14650 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16)); break;
14651 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32)); break;
14652 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64)); break;
14653 case 10:
14654 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
14655 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
14656 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
14657 break;
14658 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128)); break;
14659# if 0
14660 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256)); break;
14661 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512)); break;
14662# endif
14663 default: AssertFailed(); break;
14664 }
14665 }
14666 else
14667 {
14668 Assert(iSegReg < 6);
14669 switch (cbMem)
14670 {
14671 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8)); break;
14672 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16)); break;
14673 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32)); break;
14674 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64)); break;
14675 case 10:
14676 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
14677 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
14678 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
14679 break;
14680 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128)); break;
14681# if 0
14682 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256)); break;
14683 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512)); break;
14684# endif
14685 default: AssertFailed(); break;
14686 }
14687 }
14688# undef IEM_MAP_HLP_FN
14689# undef IEM_MAP_HLP_FN_NO_AT
14690#endif
14691
14692#ifdef VBOX_STRICT
14693 /*
14694 * Check that the fExec flags we've got make sense.
14695 */
14696 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
14697#endif
14698
14699 /*
14700 * To keep things simple we have to commit any pending writes first as we
14701 * may end up making calls.
14702 */
14703 off = iemNativeRegFlushPendingWrites(pReNative, off);
14704
14705#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
14706 /*
14707 * Move/spill/flush stuff out of call-volatile registers.
14708 * This is the easy way out. We could contain this to the tlb-miss branch
14709 * by saving and restoring active stuff here.
14710 */
14711 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
14712 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
14713#endif
14714
14715 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
14716 while the tlb-miss codepath will temporarily put it on the stack.
14717 Set the the type to stack here so we don't need to do it twice below. */
14718 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
14719 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
14720 /** @todo use a tmp register from TlbState, since they'll be free after tlb
14721 * lookup is done. */
14722
14723 /*
14724 * Define labels and allocate the result register (trying for the return
14725 * register if we can).
14726 */
14727 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
14728 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
14729 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
14730 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
14731 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
14732 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
14733 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
14734 : UINT32_MAX;
14735//off=iemNativeEmitBrk(pReNative, off, 0);
14736 /*
14737 * Jump to the TLB lookup code.
14738 */
14739 if (!TlbState.fSkip)
14740 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
14741
14742 /*
14743 * TlbMiss:
14744 *
14745 * Call helper to do the fetching.
14746 * We flush all guest register shadow copies here.
14747 */
14748 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
14749
14750#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
14751 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
14752#else
14753 RT_NOREF(idxInstr);
14754#endif
14755
14756#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
14757 /* Save variables in volatile registers. */
14758 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
14759 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
14760#endif
14761
14762 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
14763 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
14764#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
14765 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
14766#else
14767 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
14768#endif
14769
14770 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
14771 if (iSegReg != UINT8_MAX)
14772 {
14773 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
14774 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
14775 }
14776
14777 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
14778 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
14779 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
14780
14781 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
14782 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
14783
14784 /* Done setting up parameters, make the call. */
14785 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
14786
14787 /*
14788 * Put the output in the right registers.
14789 */
14790 Assert(idxRegMemResult == pVarMem->idxReg);
14791 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
14792 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
14793
14794#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
14795 /* Restore variables and guest shadow registers to volatile registers. */
14796 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
14797 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
14798#endif
14799
14800 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
14801 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
14802
14803#ifdef IEMNATIVE_WITH_TLB_LOOKUP
14804 if (!TlbState.fSkip)
14805 {
14806 /* end of tlbsmiss - Jump to the done label. */
14807 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
14808 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
14809
14810 /*
14811 * TlbLookup:
14812 */
14813 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask, fAccess,
14814 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
14815# ifdef VBOX_WITH_STATISTICS
14816 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
14817 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
14818# endif
14819
14820 /* [idxVarUnmapInfo] = 0; */
14821 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
14822
14823 /*
14824 * TlbDone:
14825 */
14826 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
14827
14828 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
14829
14830# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
14831 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
14832 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
14833# endif
14834 }
14835#else
14836 RT_NOREF(fAccess, fAlignMask, idxLabelTlbMiss);
14837#endif
14838
14839 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
14840 iemNativeVarRegisterRelease(pReNative, idxVarMem);
14841
14842 return off;
14843}
14844
14845
14846#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
14847 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
14848 (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
14849
14850#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
14851 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
14852 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
14853
14854#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
14855 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
14856 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
14857
14858#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
14859 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
14860 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
14861
14862DECL_INLINE_THROW(uint32_t)
14863iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
14864 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
14865{
14866 /*
14867 * Assert sanity.
14868 */
14869 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
14870#if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
14871 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
14872#endif
14873 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
14874 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
14875 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
14876#ifdef VBOX_STRICT
14877 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
14878 {
14879 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
14880 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
14881 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
14882 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
14883 case IEM_ACCESS_TYPE_WRITE:
14884 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
14885 case IEM_ACCESS_TYPE_READ:
14886 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
14887 default: AssertFailed();
14888 }
14889#else
14890 RT_NOREF(fAccess);
14891#endif
14892
14893 /*
14894 * To keep things simple we have to commit any pending writes first as we
14895 * may end up making calls (there shouldn't be any at this point, so this
14896 * is just for consistency).
14897 */
14898 /** @todo we could postpone this till we make the call and reload the
14899 * registers after returning from the call. Not sure if that's sensible or
14900 * not, though. */
14901 off = iemNativeRegFlushPendingWrites(pReNative, off);
14902
14903 /*
14904 * Move/spill/flush stuff out of call-volatile registers.
14905 *
14906 * We exclude any register holding the bUnmapInfo variable, as we'll be
14907 * checking it after returning from the call and will free it afterwards.
14908 */
14909 /** @todo save+restore active registers and maybe guest shadows in miss
14910 * scenario. */
14911 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */, RT_BIT_32(idxVarUnmapInfo));
14912
14913 /*
14914 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
14915 * to call the unmap helper function.
14916 *
14917 * The likelyhood of it being zero is higher than for the TLB hit when doing
14918 * the mapping, as a TLB miss for an well aligned and unproblematic memory
14919 * access should also end up with a mapping that won't need special unmapping.
14920 */
14921 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
14922 * should speed up things for the pure interpreter as well when TLBs
14923 * are enabled. */
14924#ifdef RT_ARCH_AMD64
14925 if (pVarUnmapInfo->idxReg == UINT8_MAX)
14926 {
14927 /* test byte [rbp - xxx], 0ffh */
14928 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
14929 pbCodeBuf[off++] = 0xf6;
14930 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
14931 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
14932 pbCodeBuf[off++] = 0xff;
14933 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
14934 }
14935 else
14936#endif
14937 {
14938 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
14939 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
14940 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
14941 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
14942 }
14943 uint32_t const offJmpFixup = off;
14944 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
14945
14946 /*
14947 * Call the unmap helper function.
14948 */
14949#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
14950 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
14951#else
14952 RT_NOREF(idxInstr);
14953#endif
14954
14955 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
14956 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
14957 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
14958
14959 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
14960 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
14961
14962 /* Done setting up parameters, make the call. */
14963 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
14964
14965 /* The bUnmapInfo variable is implictly free by these MCs. */
14966 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
14967
14968 /*
14969 * Done, just fixup the jump for the non-call case.
14970 */
14971 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
14972
14973 return off;
14974}
14975
14976
14977
14978/*********************************************************************************************************************************
14979* State and Exceptions *
14980*********************************************************************************************************************************/
14981
14982#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
14983#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
14984
14985#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
14986#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
14987#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
14988
14989#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
14990#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
14991#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
14992
14993
14994DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
14995{
14996 /** @todo this needs a lot more work later. */
14997 RT_NOREF(pReNative, fForChange);
14998 return off;
14999}
15000
15001
15002
15003/*********************************************************************************************************************************
15004* Emitters for FPU related operations. *
15005*********************************************************************************************************************************/
15006
15007#define IEM_MC_FETCH_FCW(a_u16Fcw) \
15008 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
15009
15010/** Emits code for IEM_MC_FETCH_FCW. */
15011DECL_INLINE_THROW(uint32_t)
15012iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
15013{
15014 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
15015 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
15016
15017 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
15018
15019 /* Allocate a temporary FCW register. */
15020 /** @todo eliminate extra register */
15021 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
15022 kIemNativeGstRegUse_ReadOnly);
15023
15024 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
15025
15026 /* Free but don't flush the FCW register. */
15027 iemNativeRegFreeTmp(pReNative, idxFcwReg);
15028 iemNativeVarRegisterRelease(pReNative, idxDstVar);
15029
15030 return off;
15031}
15032
15033
15034#define IEM_MC_FETCH_FSW(a_u16Fsw) \
15035 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
15036
15037/** Emits code for IEM_MC_FETCH_FSW. */
15038DECL_INLINE_THROW(uint32_t)
15039iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
15040{
15041 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
15042 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
15043
15044 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, false /*fInitialized*/);
15045 /* Allocate a temporary FSW register. */
15046 /** @todo eliminate extra register */
15047 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
15048 kIemNativeGstRegUse_ReadOnly);
15049
15050 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
15051
15052 /* Free but don't flush the FSW register. */
15053 iemNativeRegFreeTmp(pReNative, idxFswReg);
15054 iemNativeVarRegisterRelease(pReNative, idxDstVar);
15055
15056 return off;
15057}
15058
15059
15060
15061#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
15062
15063
15064/*********************************************************************************************************************************
15065* Emitters for SSE/AVX specific operations. *
15066*********************************************************************************************************************************/
15067
15068#define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc) \
15069 off = iemNativeEmitSimdCopyXregU128(pReNative, off, a_iXRegDst, a_iXRegSrc)
15070
15071/** Emits code for IEM_MC_FETCH_FSW. */
15072DECL_INLINE_THROW(uint32_t)
15073iemNativeEmitSimdCopyXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXRegDst, uint8_t iXRegSrc)
15074{
15075 /* Allocate destination and source register. */
15076 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegDst),
15077 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForFullWrite);
15078 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc),
15079 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
15080
15081 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
15082 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXRegDst);
15083 /* We don't need to write everything back here as the destination is marked as dirty and will be flushed automatically. */
15084
15085 /* Free but don't flush the source and destination register. */
15086 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
15087 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
15088
15089 return off;
15090}
15091#endif
15092
15093
15094/*********************************************************************************************************************************
15095* The native code generator functions for each MC block. *
15096*********************************************************************************************************************************/
15097
15098/*
15099 * Include instruction emitters.
15100 */
15101#include "target-x86/IEMAllN8veEmit-x86.h"
15102
15103/*
15104 * Include g_apfnIemNativeRecompileFunctions and associated functions.
15105 *
15106 * This should probably live in it's own file later, but lets see what the
15107 * compile times turn out to be first.
15108 */
15109#include "IEMNativeFunctions.cpp.h"
15110
15111
15112
15113/*********************************************************************************************************************************
15114* Recompiler Core. *
15115*********************************************************************************************************************************/
15116
15117
15118/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
15119static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
15120{
15121 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
15122 pDis->cbCachedInstr += cbMaxRead;
15123 RT_NOREF(cbMinRead);
15124 return VERR_NO_DATA;
15125}
15126
15127
15128DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
15129{
15130 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
15131 {
15132#define ENTRY(a_Member) { RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member }
15133 ENTRY(fLocalForcedActions),
15134 ENTRY(iem.s.rcPassUp),
15135 ENTRY(iem.s.fExec),
15136 ENTRY(iem.s.pbInstrBuf),
15137 ENTRY(iem.s.uInstrBufPc),
15138 ENTRY(iem.s.GCPhysInstrBuf),
15139 ENTRY(iem.s.cbInstrBufTotal),
15140 ENTRY(iem.s.idxTbCurInstr),
15141#ifdef VBOX_WITH_STATISTICS
15142 ENTRY(iem.s.StatNativeTlbHitsForFetch),
15143 ENTRY(iem.s.StatNativeTlbHitsForStore),
15144 ENTRY(iem.s.StatNativeTlbHitsForStack),
15145 ENTRY(iem.s.StatNativeTlbHitsForMapped),
15146 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
15147 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
15148 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
15149 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
15150#endif
15151 ENTRY(iem.s.DataTlb.aEntries),
15152 ENTRY(iem.s.DataTlb.uTlbRevision),
15153 ENTRY(iem.s.DataTlb.uTlbPhysRev),
15154 ENTRY(iem.s.DataTlb.cTlbHits),
15155 ENTRY(iem.s.CodeTlb.aEntries),
15156 ENTRY(iem.s.CodeTlb.uTlbRevision),
15157 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
15158 ENTRY(iem.s.CodeTlb.cTlbHits),
15159 ENTRY(pVMR3),
15160 ENTRY(cpum.GstCtx.rax),
15161 ENTRY(cpum.GstCtx.ah),
15162 ENTRY(cpum.GstCtx.rcx),
15163 ENTRY(cpum.GstCtx.ch),
15164 ENTRY(cpum.GstCtx.rdx),
15165 ENTRY(cpum.GstCtx.dh),
15166 ENTRY(cpum.GstCtx.rbx),
15167 ENTRY(cpum.GstCtx.bh),
15168 ENTRY(cpum.GstCtx.rsp),
15169 ENTRY(cpum.GstCtx.rbp),
15170 ENTRY(cpum.GstCtx.rsi),
15171 ENTRY(cpum.GstCtx.rdi),
15172 ENTRY(cpum.GstCtx.r8),
15173 ENTRY(cpum.GstCtx.r9),
15174 ENTRY(cpum.GstCtx.r10),
15175 ENTRY(cpum.GstCtx.r11),
15176 ENTRY(cpum.GstCtx.r12),
15177 ENTRY(cpum.GstCtx.r13),
15178 ENTRY(cpum.GstCtx.r14),
15179 ENTRY(cpum.GstCtx.r15),
15180 ENTRY(cpum.GstCtx.es.Sel),
15181 ENTRY(cpum.GstCtx.es.u64Base),
15182 ENTRY(cpum.GstCtx.es.u32Limit),
15183 ENTRY(cpum.GstCtx.es.Attr),
15184 ENTRY(cpum.GstCtx.cs.Sel),
15185 ENTRY(cpum.GstCtx.cs.u64Base),
15186 ENTRY(cpum.GstCtx.cs.u32Limit),
15187 ENTRY(cpum.GstCtx.cs.Attr),
15188 ENTRY(cpum.GstCtx.ss.Sel),
15189 ENTRY(cpum.GstCtx.ss.u64Base),
15190 ENTRY(cpum.GstCtx.ss.u32Limit),
15191 ENTRY(cpum.GstCtx.ss.Attr),
15192 ENTRY(cpum.GstCtx.ds.Sel),
15193 ENTRY(cpum.GstCtx.ds.u64Base),
15194 ENTRY(cpum.GstCtx.ds.u32Limit),
15195 ENTRY(cpum.GstCtx.ds.Attr),
15196 ENTRY(cpum.GstCtx.fs.Sel),
15197 ENTRY(cpum.GstCtx.fs.u64Base),
15198 ENTRY(cpum.GstCtx.fs.u32Limit),
15199 ENTRY(cpum.GstCtx.fs.Attr),
15200 ENTRY(cpum.GstCtx.gs.Sel),
15201 ENTRY(cpum.GstCtx.gs.u64Base),
15202 ENTRY(cpum.GstCtx.gs.u32Limit),
15203 ENTRY(cpum.GstCtx.gs.Attr),
15204 ENTRY(cpum.GstCtx.rip),
15205 ENTRY(cpum.GstCtx.eflags),
15206 ENTRY(cpum.GstCtx.uRipInhibitInt),
15207#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
15208 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
15209 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
15210 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
15211 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
15212 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
15213 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
15214 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
15215 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
15216 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
15217 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
15218 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
15219 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
15220 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
15221 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
15222 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
15223 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
15224 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
15225 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
15226 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
15227 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
15228 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
15229 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
15230 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
15231 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
15232 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
15233 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
15234 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
15235 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
15236 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
15237 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
15238 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
15239 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
15240#endif
15241#undef ENTRY
15242 };
15243#ifdef VBOX_STRICT
15244 static bool s_fOrderChecked = false;
15245 if (!s_fOrderChecked)
15246 {
15247 s_fOrderChecked = true;
15248 uint32_t offPrev = s_aMembers[0].off;
15249 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
15250 {
15251 Assert(s_aMembers[i].off > offPrev);
15252 offPrev = s_aMembers[i].off;
15253 }
15254 }
15255#endif
15256
15257 /*
15258 * Binary lookup.
15259 */
15260 unsigned iStart = 0;
15261 unsigned iEnd = RT_ELEMENTS(s_aMembers);
15262 for (;;)
15263 {
15264 unsigned const iCur = iStart + (iEnd - iStart) / 2;
15265 uint32_t const offCur = s_aMembers[iCur].off;
15266 if (off < offCur)
15267 {
15268 if (iCur != iStart)
15269 iEnd = iCur;
15270 else
15271 break;
15272 }
15273 else if (off > offCur)
15274 {
15275 if (iCur + 1 < iEnd)
15276 iStart = iCur + 1;
15277 else
15278 break;
15279 }
15280 else
15281 return s_aMembers[iCur].pszName;
15282 }
15283#ifdef VBOX_WITH_STATISTICS
15284 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
15285 return "iem.s.acThreadedFuncStats[iFn]";
15286#endif
15287 return NULL;
15288}
15289
15290
15291/**
15292 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
15293 * @returns pszBuf.
15294 * @param fFlags The flags.
15295 * @param pszBuf The output buffer.
15296 * @param cbBuf The output buffer size. At least 32 bytes.
15297 */
15298DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
15299{
15300 Assert(cbBuf >= 32);
15301 static RTSTRTUPLE const s_aModes[] =
15302 {
15303 /* [00] = */ { RT_STR_TUPLE("16BIT") },
15304 /* [01] = */ { RT_STR_TUPLE("32BIT") },
15305 /* [02] = */ { RT_STR_TUPLE("!2!") },
15306 /* [03] = */ { RT_STR_TUPLE("!3!") },
15307 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
15308 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
15309 /* [06] = */ { RT_STR_TUPLE("!6!") },
15310 /* [07] = */ { RT_STR_TUPLE("!7!") },
15311 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
15312 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
15313 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
15314 /* [0b] = */ { RT_STR_TUPLE("!b!") },
15315 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
15316 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
15317 /* [0e] = */ { RT_STR_TUPLE("!e!") },
15318 /* [0f] = */ { RT_STR_TUPLE("!f!") },
15319 /* [10] = */ { RT_STR_TUPLE("!10!") },
15320 /* [11] = */ { RT_STR_TUPLE("!11!") },
15321 /* [12] = */ { RT_STR_TUPLE("!12!") },
15322 /* [13] = */ { RT_STR_TUPLE("!13!") },
15323 /* [14] = */ { RT_STR_TUPLE("!14!") },
15324 /* [15] = */ { RT_STR_TUPLE("!15!") },
15325 /* [16] = */ { RT_STR_TUPLE("!16!") },
15326 /* [17] = */ { RT_STR_TUPLE("!17!") },
15327 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
15328 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
15329 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
15330 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
15331 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
15332 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
15333 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
15334 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
15335 };
15336 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
15337 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
15338 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
15339
15340 pszBuf[off++] = ' ';
15341 pszBuf[off++] = 'C';
15342 pszBuf[off++] = 'P';
15343 pszBuf[off++] = 'L';
15344 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
15345 Assert(off < 32);
15346
15347 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
15348
15349 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
15350 {
15351 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
15352 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
15353 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
15354 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
15355 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
15356 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
15357 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
15358 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
15359 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
15360 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
15361 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
15362 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
15363 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
15364 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
15365 };
15366 if (fFlags)
15367 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
15368 if (s_aFlags[i].fFlag & fFlags)
15369 {
15370 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
15371 pszBuf[off++] = ' ';
15372 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
15373 off += s_aFlags[i].cchName;
15374 fFlags &= ~s_aFlags[i].fFlag;
15375 if (!fFlags)
15376 break;
15377 }
15378 pszBuf[off] = '\0';
15379
15380 return pszBuf;
15381}
15382
15383
15384DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
15385{
15386 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
15387#if defined(RT_ARCH_AMD64)
15388 static const char * const a_apszMarkers[] =
15389 {
15390 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
15391 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
15392 };
15393#endif
15394
15395 char szDisBuf[512];
15396 DISSTATE Dis;
15397 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
15398 uint32_t const cNative = pTb->Native.cInstructions;
15399 uint32_t offNative = 0;
15400#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
15401 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
15402#endif
15403 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
15404 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
15405 : DISCPUMODE_64BIT;
15406#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
15407 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
15408#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
15409 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
15410#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
15411# error "Port me"
15412#else
15413 csh hDisasm = ~(size_t)0;
15414# if defined(RT_ARCH_AMD64)
15415 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
15416# elif defined(RT_ARCH_ARM64)
15417 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
15418# else
15419# error "Port me"
15420# endif
15421 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
15422
15423 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
15424 //Assert(rcCs == CS_ERR_OK);
15425#endif
15426
15427 /*
15428 * Print TB info.
15429 */
15430 pHlp->pfnPrintf(pHlp,
15431 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
15432 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
15433 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
15434 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
15435#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
15436 if (pDbgInfo && pDbgInfo->cEntries > 1)
15437 {
15438 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
15439
15440 /*
15441 * This disassembly is driven by the debug info which follows the native
15442 * code and indicates when it starts with the next guest instructions,
15443 * where labels are and such things.
15444 */
15445 uint32_t idxThreadedCall = 0;
15446 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
15447 uint8_t idxRange = UINT8_MAX;
15448 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
15449 uint32_t offRange = 0;
15450 uint32_t offOpcodes = 0;
15451 uint32_t const cbOpcodes = pTb->cbOpcodes;
15452 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
15453 uint32_t const cDbgEntries = pDbgInfo->cEntries;
15454 uint32_t iDbgEntry = 1;
15455 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
15456
15457 while (offNative < cNative)
15458 {
15459 /* If we're at or have passed the point where the next chunk of debug
15460 info starts, process it. */
15461 if (offDbgNativeNext <= offNative)
15462 {
15463 offDbgNativeNext = UINT32_MAX;
15464 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
15465 {
15466 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
15467 {
15468 case kIemTbDbgEntryType_GuestInstruction:
15469 {
15470 /* Did the exec flag change? */
15471 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
15472 {
15473 pHlp->pfnPrintf(pHlp,
15474 " fExec change %#08x -> %#08x %s\n",
15475 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
15476 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
15477 szDisBuf, sizeof(szDisBuf)));
15478 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
15479 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
15480 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
15481 : DISCPUMODE_64BIT;
15482 }
15483
15484 /* New opcode range? We need to fend up a spurious debug info entry here for cases
15485 where the compilation was aborted before the opcode was recorded and the actual
15486 instruction was translated to a threaded call. This may happen when we run out
15487 of ranges, or when some complicated interrupts/FFs are found to be pending or
15488 similar. So, we just deal with it here rather than in the compiler code as it
15489 is a lot simpler to do here. */
15490 if ( idxRange == UINT8_MAX
15491 || idxRange >= cRanges
15492 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
15493 {
15494 idxRange += 1;
15495 if (idxRange < cRanges)
15496 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
15497 else
15498 continue;
15499 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
15500 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
15501 + (pTb->aRanges[idxRange].idxPhysPage == 0
15502 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
15503 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
15504 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
15505 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
15506 pTb->aRanges[idxRange].idxPhysPage);
15507 GCPhysPc += offRange;
15508 }
15509
15510 /* Disassemble the instruction. */
15511 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
15512 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
15513 uint32_t cbInstr = 1;
15514 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
15515 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
15516 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
15517 if (RT_SUCCESS(rc))
15518 {
15519 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
15520 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
15521 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
15522 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
15523
15524 static unsigned const s_offMarker = 55;
15525 static char const s_szMarker[] = " ; <--- guest";
15526 if (cch < s_offMarker)
15527 {
15528 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
15529 cch = s_offMarker;
15530 }
15531 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
15532 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
15533
15534 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
15535 }
15536 else
15537 {
15538 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
15539 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
15540 cbInstr = 1;
15541 }
15542 GCPhysPc += cbInstr;
15543 offOpcodes += cbInstr;
15544 offRange += cbInstr;
15545 continue;
15546 }
15547
15548 case kIemTbDbgEntryType_ThreadedCall:
15549 pHlp->pfnPrintf(pHlp,
15550 " Call #%u to %s (%u args) - %s\n",
15551 idxThreadedCall,
15552 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
15553 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
15554 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
15555 idxThreadedCall++;
15556 continue;
15557
15558 case kIemTbDbgEntryType_GuestRegShadowing:
15559 {
15560 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
15561 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
15562 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
15563 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
15564 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
15565 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
15566 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
15567 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
15568 else
15569 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
15570 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
15571 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
15572 continue;
15573 }
15574
15575#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
15576 case kIemTbDbgEntryType_GuestSimdRegShadowing:
15577 {
15578 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
15579 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
15580 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
15581 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
15582 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
15583 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
15584 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
15585 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
15586 else
15587 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
15588 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
15589 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
15590 continue;
15591 }
15592#endif
15593
15594 case kIemTbDbgEntryType_Label:
15595 {
15596 const char *pszName = "what_the_fudge";
15597 const char *pszComment = "";
15598 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
15599 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
15600 {
15601 case kIemNativeLabelType_Return:
15602 pszName = "Return";
15603 break;
15604 case kIemNativeLabelType_ReturnBreak:
15605 pszName = "ReturnBreak";
15606 break;
15607 case kIemNativeLabelType_ReturnWithFlags:
15608 pszName = "ReturnWithFlags";
15609 break;
15610 case kIemNativeLabelType_NonZeroRetOrPassUp:
15611 pszName = "NonZeroRetOrPassUp";
15612 break;
15613 case kIemNativeLabelType_RaiseGp0:
15614 pszName = "RaiseGp0";
15615 break;
15616 case kIemNativeLabelType_RaiseNm:
15617 pszName = "RaiseNm";
15618 break;
15619 case kIemNativeLabelType_RaiseUd:
15620 pszName = "RaiseUd";
15621 break;
15622 case kIemNativeLabelType_RaiseMf:
15623 pszName = "RaiseMf";
15624 break;
15625 case kIemNativeLabelType_RaiseXf:
15626 pszName = "RaiseXf";
15627 break;
15628 case kIemNativeLabelType_ObsoleteTb:
15629 pszName = "ObsoleteTb";
15630 break;
15631 case kIemNativeLabelType_NeedCsLimChecking:
15632 pszName = "NeedCsLimChecking";
15633 break;
15634 case kIemNativeLabelType_CheckBranchMiss:
15635 pszName = "CheckBranchMiss";
15636 break;
15637 case kIemNativeLabelType_If:
15638 pszName = "If";
15639 fNumbered = true;
15640 break;
15641 case kIemNativeLabelType_Else:
15642 pszName = "Else";
15643 fNumbered = true;
15644 pszComment = " ; regs state restored pre-if-block";
15645 break;
15646 case kIemNativeLabelType_Endif:
15647 pszName = "Endif";
15648 fNumbered = true;
15649 break;
15650 case kIemNativeLabelType_CheckIrq:
15651 pszName = "CheckIrq_CheckVM";
15652 fNumbered = true;
15653 break;
15654 case kIemNativeLabelType_TlbLookup:
15655 pszName = "TlbLookup";
15656 fNumbered = true;
15657 break;
15658 case kIemNativeLabelType_TlbMiss:
15659 pszName = "TlbMiss";
15660 fNumbered = true;
15661 break;
15662 case kIemNativeLabelType_TlbDone:
15663 pszName = "TlbDone";
15664 fNumbered = true;
15665 break;
15666 case kIemNativeLabelType_Invalid:
15667 case kIemNativeLabelType_End:
15668 break;
15669 }
15670 if (fNumbered)
15671 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
15672 else
15673 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
15674 continue;
15675 }
15676
15677 case kIemTbDbgEntryType_NativeOffset:
15678 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
15679 Assert(offDbgNativeNext > offNative);
15680 break;
15681
15682#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
15683 case kIemTbDbgEntryType_DelayedPcUpdate:
15684 pHlp->pfnPrintf(pHlp,
15685 " Updating guest PC value by %u (cInstrSkipped=%u)\n",
15686 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
15687 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
15688 continue;
15689#endif
15690
15691 default:
15692 AssertFailed();
15693 }
15694 iDbgEntry++;
15695 break;
15696 }
15697 }
15698
15699 /*
15700 * Disassemble the next native instruction.
15701 */
15702 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
15703# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
15704 uint32_t cbInstr = sizeof(paNative[0]);
15705 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
15706 if (RT_SUCCESS(rc))
15707 {
15708# if defined(RT_ARCH_AMD64)
15709 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
15710 {
15711 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
15712 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
15713 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
15714 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
15715 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
15716 uInfo & 0x8000 ? "recompiled" : "todo");
15717 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
15718 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
15719 else
15720 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
15721 }
15722 else
15723# endif
15724 {
15725 const char *pszAnnotation = NULL;
15726# ifdef RT_ARCH_AMD64
15727 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
15728 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
15729 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
15730 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
15731 PCDISOPPARAM pMemOp;
15732 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))
15733 pMemOp = &Dis.Param1;
15734 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))
15735 pMemOp = &Dis.Param2;
15736 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))
15737 pMemOp = &Dis.Param3;
15738 else
15739 pMemOp = NULL;
15740 if ( pMemOp
15741 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
15742 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
15743 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
15744 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
15745
15746#elif defined(RT_ARCH_ARM64)
15747 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
15748 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
15749 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
15750# else
15751# error "Port me"
15752# endif
15753 if (pszAnnotation)
15754 {
15755 static unsigned const s_offAnnotation = 55;
15756 size_t const cchAnnotation = strlen(pszAnnotation);
15757 size_t cchDis = strlen(szDisBuf);
15758 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
15759 {
15760 if (cchDis < s_offAnnotation)
15761 {
15762 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
15763 cchDis = s_offAnnotation;
15764 }
15765 szDisBuf[cchDis++] = ' ';
15766 szDisBuf[cchDis++] = ';';
15767 szDisBuf[cchDis++] = ' ';
15768 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
15769 }
15770 }
15771 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
15772 }
15773 }
15774 else
15775 {
15776# if defined(RT_ARCH_AMD64)
15777 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
15778 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
15779# elif defined(RT_ARCH_ARM64)
15780 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
15781# else
15782# error "Port me"
15783# endif
15784 cbInstr = sizeof(paNative[0]);
15785 }
15786 offNative += cbInstr / sizeof(paNative[0]);
15787
15788# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
15789 cs_insn *pInstr;
15790 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
15791 (uintptr_t)pNativeCur, 1, &pInstr);
15792 if (cInstrs > 0)
15793 {
15794 Assert(cInstrs == 1);
15795 const char *pszAnnotation = NULL;
15796# if defined(RT_ARCH_ARM64)
15797 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
15798 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
15799 {
15800 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
15801 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
15802 char *psz = strchr(pInstr->op_str, '[');
15803 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
15804 {
15805 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
15806 int32_t off = -1;
15807 psz += 4;
15808 if (*psz == ']')
15809 off = 0;
15810 else if (*psz == ',')
15811 {
15812 psz = RTStrStripL(psz + 1);
15813 if (*psz == '#')
15814 off = RTStrToInt32(&psz[1]);
15815 /** @todo deal with index registers and LSL as well... */
15816 }
15817 if (off >= 0)
15818 pszAnnotation = iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
15819 }
15820 }
15821# endif
15822
15823 size_t const cchOp = strlen(pInstr->op_str);
15824# if defined(RT_ARCH_AMD64)
15825 if (pszAnnotation)
15826 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
15827 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
15828 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
15829 else
15830 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
15831 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
15832
15833# else
15834 if (pszAnnotation)
15835 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
15836 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
15837 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
15838 else
15839 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
15840 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
15841# endif
15842 offNative += pInstr->size / sizeof(*pNativeCur);
15843 cs_free(pInstr, cInstrs);
15844 }
15845 else
15846 {
15847# if defined(RT_ARCH_AMD64)
15848 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
15849 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
15850# else
15851 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
15852# endif
15853 offNative++;
15854 }
15855# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
15856 }
15857 }
15858 else
15859#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
15860 {
15861 /*
15862 * No debug info, just disassemble the x86 code and then the native code.
15863 *
15864 * First the guest code:
15865 */
15866 for (unsigned i = 0; i < pTb->cRanges; i++)
15867 {
15868 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
15869 + (pTb->aRanges[i].idxPhysPage == 0
15870 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
15871 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
15872 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
15873 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
15874 unsigned off = pTb->aRanges[i].offOpcodes;
15875 /** @todo this ain't working when crossing pages! */
15876 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
15877 while (off < cbOpcodes)
15878 {
15879 uint32_t cbInstr = 1;
15880 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
15881 &pTb->pabOpcodes[off], cbOpcodes - off,
15882 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
15883 if (RT_SUCCESS(rc))
15884 {
15885 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
15886 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
15887 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
15888 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
15889 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
15890 GCPhysPc += cbInstr;
15891 off += cbInstr;
15892 }
15893 else
15894 {
15895 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
15896 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
15897 break;
15898 }
15899 }
15900 }
15901
15902 /*
15903 * Then the native code:
15904 */
15905 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
15906 while (offNative < cNative)
15907 {
15908 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
15909# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
15910 uint32_t cbInstr = sizeof(paNative[0]);
15911 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
15912 if (RT_SUCCESS(rc))
15913 {
15914# if defined(RT_ARCH_AMD64)
15915 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
15916 {
15917 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
15918 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
15919 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
15920 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
15921 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
15922 uInfo & 0x8000 ? "recompiled" : "todo");
15923 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
15924 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
15925 else
15926 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
15927 }
15928 else
15929# endif
15930 {
15931# ifdef RT_ARCH_AMD64
15932 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
15933 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
15934 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
15935 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
15936# elif defined(RT_ARCH_ARM64)
15937 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
15938 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
15939 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
15940# else
15941# error "Port me"
15942# endif
15943 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
15944 }
15945 }
15946 else
15947 {
15948# if defined(RT_ARCH_AMD64)
15949 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
15950 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
15951# else
15952 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
15953# endif
15954 cbInstr = sizeof(paNative[0]);
15955 }
15956 offNative += cbInstr / sizeof(paNative[0]);
15957
15958# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
15959 cs_insn *pInstr;
15960 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
15961 (uintptr_t)pNativeCur, 1, &pInstr);
15962 if (cInstrs > 0)
15963 {
15964 Assert(cInstrs == 1);
15965# if defined(RT_ARCH_AMD64)
15966 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
15967 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
15968# else
15969 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
15970 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
15971# endif
15972 offNative += pInstr->size / sizeof(*pNativeCur);
15973 cs_free(pInstr, cInstrs);
15974 }
15975 else
15976 {
15977# if defined(RT_ARCH_AMD64)
15978 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
15979 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
15980# else
15981 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
15982# endif
15983 offNative++;
15984 }
15985# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
15986 }
15987 }
15988
15989#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
15990 /* Cleanup. */
15991 cs_close(&hDisasm);
15992#endif
15993}
15994
15995
15996/**
15997 * Recompiles the given threaded TB into a native one.
15998 *
15999 * In case of failure the translation block will be returned as-is.
16000 *
16001 * @returns pTb.
16002 * @param pVCpu The cross context virtual CPU structure of the calling
16003 * thread.
16004 * @param pTb The threaded translation to recompile to native.
16005 */
16006DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
16007{
16008 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
16009
16010 /*
16011 * The first time thru, we allocate the recompiler state, the other times
16012 * we just need to reset it before using it again.
16013 */
16014 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
16015 if (RT_LIKELY(pReNative))
16016 iemNativeReInit(pReNative, pTb);
16017 else
16018 {
16019 pReNative = iemNativeInit(pVCpu, pTb);
16020 AssertReturn(pReNative, pTb);
16021 }
16022
16023#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
16024 /*
16025 * First do liveness analysis. This is done backwards.
16026 */
16027 {
16028 uint32_t idxCall = pTb->Thrd.cCalls;
16029 if (idxCall <= pReNative->cLivenessEntriesAlloc)
16030 { /* likely */ }
16031 else
16032 {
16033 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
16034 while (idxCall > cAlloc)
16035 cAlloc *= 2;
16036 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
16037 AssertReturn(pvNew, pTb);
16038 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
16039 pReNative->cLivenessEntriesAlloc = cAlloc;
16040 }
16041 AssertReturn(idxCall > 0, pTb);
16042 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
16043
16044 /* The initial (final) entry. */
16045 idxCall--;
16046 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
16047
16048 /* Loop backwards thru the calls and fill in the other entries. */
16049 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
16050 while (idxCall > 0)
16051 {
16052 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
16053 if (pfnLiveness)
16054 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
16055 else
16056 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
16057 pCallEntry--;
16058 idxCall--;
16059 }
16060
16061# ifdef VBOX_WITH_STATISTICS
16062 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
16063 to 'clobbered' rather that 'input'. */
16064 /** @todo */
16065# endif
16066 }
16067#endif
16068
16069 /*
16070 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
16071 * for aborting if an error happens.
16072 */
16073 uint32_t cCallsLeft = pTb->Thrd.cCalls;
16074#ifdef LOG_ENABLED
16075 uint32_t const cCallsOrg = cCallsLeft;
16076#endif
16077 uint32_t off = 0;
16078 int rc = VINF_SUCCESS;
16079 IEMNATIVE_TRY_SETJMP(pReNative, rc)
16080 {
16081 /*
16082 * Emit prolog code (fixed).
16083 */
16084 off = iemNativeEmitProlog(pReNative, off);
16085
16086 /*
16087 * Convert the calls to native code.
16088 */
16089#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
16090 int32_t iGstInstr = -1;
16091#endif
16092#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
16093 uint32_t cThreadedCalls = 0;
16094 uint32_t cRecompiledCalls = 0;
16095#endif
16096#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
16097 uint32_t idxCurCall = 0;
16098#endif
16099 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
16100 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
16101 while (cCallsLeft-- > 0)
16102 {
16103 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
16104#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
16105 pReNative->idxCurCall = idxCurCall;
16106#endif
16107
16108 /*
16109 * Debug info, assembly markup and statistics.
16110 */
16111#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
16112 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
16113 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
16114#endif
16115#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
16116 iemNativeDbgInfoAddNativeOffset(pReNative, off);
16117 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
16118 {
16119 if (iGstInstr < (int32_t)pTb->cInstructions)
16120 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
16121 else
16122 Assert(iGstInstr == pTb->cInstructions);
16123 iGstInstr = pCallEntry->idxInstr;
16124 }
16125 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
16126#endif
16127#if defined(VBOX_STRICT)
16128 off = iemNativeEmitMarker(pReNative, off,
16129 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
16130#endif
16131#if defined(VBOX_STRICT)
16132 iemNativeRegAssertSanity(pReNative);
16133#endif
16134#ifdef VBOX_WITH_STATISTICS
16135 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
16136#endif
16137
16138 /*
16139 * Actual work.
16140 */
16141 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
16142 pfnRecom ? "(recompiled)" : "(todo)"));
16143 if (pfnRecom) /** @todo stats on this. */
16144 {
16145 off = pfnRecom(pReNative, off, pCallEntry);
16146 STAM_REL_STATS({cRecompiledCalls++;});
16147 }
16148 else
16149 {
16150 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
16151 STAM_REL_STATS({cThreadedCalls++;});
16152 }
16153 Assert(off <= pReNative->cInstrBufAlloc);
16154 Assert(pReNative->cCondDepth == 0);
16155
16156#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
16157 if (LogIs2Enabled())
16158 {
16159 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
16160# ifndef IEMLIVENESS_EXTENDED_LAYOUT
16161 static const char s_achState[] = "CUXI";
16162# else
16163 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
16164# endif
16165
16166 char szGpr[17];
16167 for (unsigned i = 0; i < 16; i++)
16168 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
16169 szGpr[16] = '\0';
16170
16171 char szSegBase[X86_SREG_COUNT + 1];
16172 char szSegLimit[X86_SREG_COUNT + 1];
16173 char szSegAttrib[X86_SREG_COUNT + 1];
16174 char szSegSel[X86_SREG_COUNT + 1];
16175 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
16176 {
16177 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
16178 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
16179 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
16180 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
16181 }
16182 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
16183 = szSegSel[X86_SREG_COUNT] = '\0';
16184
16185 char szEFlags[8];
16186 for (unsigned i = 0; i < 7; i++)
16187 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
16188 szEFlags[7] = '\0';
16189
16190 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
16191 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
16192 }
16193#endif
16194
16195 /*
16196 * Advance.
16197 */
16198 pCallEntry++;
16199#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
16200 idxCurCall++;
16201#endif
16202 }
16203
16204 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
16205 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
16206 if (!cThreadedCalls)
16207 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
16208
16209 /*
16210 * Emit the epilog code.
16211 */
16212 uint32_t idxReturnLabel;
16213 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
16214
16215 /*
16216 * Generate special jump labels.
16217 */
16218 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
16219 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
16220 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
16221 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
16222 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
16223 off = iemNativeEmitRaiseGp0(pReNative, off, idxReturnLabel);
16224 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseNm))
16225 off = iemNativeEmitRaiseNm(pReNative, off, idxReturnLabel);
16226 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseUd))
16227 off = iemNativeEmitRaiseUd(pReNative, off, idxReturnLabel);
16228 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseMf))
16229 off = iemNativeEmitRaiseMf(pReNative, off, idxReturnLabel);
16230 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseXf))
16231 off = iemNativeEmitRaiseXf(pReNative, off, idxReturnLabel);
16232 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ObsoleteTb))
16233 off = iemNativeEmitObsoleteTb(pReNative, off, idxReturnLabel);
16234 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_NeedCsLimChecking))
16235 off = iemNativeEmitNeedCsLimChecking(pReNative, off, idxReturnLabel);
16236 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_CheckBranchMiss))
16237 off = iemNativeEmitCheckBranchMiss(pReNative, off, idxReturnLabel);
16238 }
16239 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
16240 {
16241 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
16242 return pTb;
16243 }
16244 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
16245 Assert(off <= pReNative->cInstrBufAlloc);
16246
16247 /*
16248 * Make sure all labels has been defined.
16249 */
16250 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
16251#ifdef VBOX_STRICT
16252 uint32_t const cLabels = pReNative->cLabels;
16253 for (uint32_t i = 0; i < cLabels; i++)
16254 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
16255#endif
16256
16257 /*
16258 * Allocate executable memory, copy over the code we've generated.
16259 */
16260 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
16261 if (pTbAllocator->pDelayedFreeHead)
16262 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
16263
16264 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
16265 AssertReturn(paFinalInstrBuf, pTb);
16266 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
16267
16268 /*
16269 * Apply fixups.
16270 */
16271 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
16272 uint32_t const cFixups = pReNative->cFixups;
16273 for (uint32_t i = 0; i < cFixups; i++)
16274 {
16275 Assert(paFixups[i].off < off);
16276 Assert(paFixups[i].idxLabel < cLabels);
16277 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
16278 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
16279 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
16280 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
16281 switch (paFixups[i].enmType)
16282 {
16283#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
16284 case kIemNativeFixupType_Rel32:
16285 Assert(paFixups[i].off + 4 <= off);
16286 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
16287 continue;
16288
16289#elif defined(RT_ARCH_ARM64)
16290 case kIemNativeFixupType_RelImm26At0:
16291 {
16292 Assert(paFixups[i].off < off);
16293 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
16294 Assert(offDisp >= -262144 && offDisp < 262144);
16295 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
16296 continue;
16297 }
16298
16299 case kIemNativeFixupType_RelImm19At5:
16300 {
16301 Assert(paFixups[i].off < off);
16302 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
16303 Assert(offDisp >= -262144 && offDisp < 262144);
16304 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
16305 continue;
16306 }
16307
16308 case kIemNativeFixupType_RelImm14At5:
16309 {
16310 Assert(paFixups[i].off < off);
16311 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
16312 Assert(offDisp >= -8192 && offDisp < 8192);
16313 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
16314 continue;
16315 }
16316
16317#endif
16318 case kIemNativeFixupType_Invalid:
16319 case kIemNativeFixupType_End:
16320 break;
16321 }
16322 AssertFailed();
16323 }
16324
16325 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
16326 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
16327
16328 /*
16329 * Convert the translation block.
16330 */
16331 RTMemFree(pTb->Thrd.paCalls);
16332 pTb->Native.paInstructions = paFinalInstrBuf;
16333 pTb->Native.cInstructions = off;
16334 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
16335#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
16336 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
16337 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
16338#endif
16339
16340 Assert(pTbAllocator->cThreadedTbs > 0);
16341 pTbAllocator->cThreadedTbs -= 1;
16342 pTbAllocator->cNativeTbs += 1;
16343 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
16344
16345#ifdef LOG_ENABLED
16346 /*
16347 * Disassemble to the log if enabled.
16348 */
16349 if (LogIs3Enabled())
16350 {
16351 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
16352 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
16353# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
16354 RTLogFlush(NULL);
16355# endif
16356 }
16357#endif
16358 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
16359
16360 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
16361 return pTb;
16362}
16363
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette