VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 103773

Last change on this file since 103773 was 103769, checked in by vboxsync, 13 months ago

VMM/IEM: Windows build fixes for SIMD register allocator. bugref:10614

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 735.9 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 103769 2024-03-11 14:56:28Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMN8veRecompilerTlbLookup.h"
103#include "IEMNativeFunctions.h"
104
105
106/*
107 * Narrow down configs here to avoid wasting time on unused configs here.
108 * Note! Same checks in IEMAllThrdRecompiler.cpp.
109 */
110
111#ifndef IEM_WITH_CODE_TLB
112# error The code TLB must be enabled for the recompiler.
113#endif
114
115#ifndef IEM_WITH_DATA_TLB
116# error The data TLB must be enabled for the recompiler.
117#endif
118
119#ifndef IEM_WITH_SETJMP
120# error The setjmp approach must be enabled for the recompiler.
121#endif
122
123/** @todo eliminate this clang build hack. */
124#if RT_CLANG_PREREQ(4, 0)
125# pragma GCC diagnostic ignored "-Wunused-function"
126#endif
127
128
129/*********************************************************************************************************************************
130* Internal Functions *
131*********************************************************************************************************************************/
132#ifdef VBOX_STRICT
133static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
134 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg);
135# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
136static uint32_t iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
137 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz);
138# endif
139static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative);
140#endif
141#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
142static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off);
143static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
144#endif
145DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
146DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
147 IEMNATIVEGSTREG enmGstReg, uint32_t off);
148DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
149
150
151/*********************************************************************************************************************************
152* Executable Memory Allocator *
153*********************************************************************************************************************************/
154/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
155 * Use an alternative chunk sub-allocator that does store internal data
156 * in the chunk.
157 *
158 * Using the RTHeapSimple is not practial on newer darwin systems where
159 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
160 * memory. We would have to change the protection of the whole chunk for
161 * every call to RTHeapSimple, which would be rather expensive.
162 *
163 * This alternative implemenation let restrict page protection modifications
164 * to the pages backing the executable memory we just allocated.
165 */
166#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
167/** The chunk sub-allocation unit size in bytes. */
168#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
169/** The chunk sub-allocation unit size as a shift factor. */
170#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
171
172#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
173# ifdef IEMNATIVE_USE_GDB_JIT
174# define IEMNATIVE_USE_GDB_JIT_ET_DYN
175
176/** GDB JIT: Code entry. */
177typedef struct GDBJITCODEENTRY
178{
179 struct GDBJITCODEENTRY *pNext;
180 struct GDBJITCODEENTRY *pPrev;
181 uint8_t *pbSymFile;
182 uint64_t cbSymFile;
183} GDBJITCODEENTRY;
184
185/** GDB JIT: Actions. */
186typedef enum GDBJITACTIONS : uint32_t
187{
188 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
189} GDBJITACTIONS;
190
191/** GDB JIT: Descriptor. */
192typedef struct GDBJITDESCRIPTOR
193{
194 uint32_t uVersion;
195 GDBJITACTIONS enmAction;
196 GDBJITCODEENTRY *pRelevant;
197 GDBJITCODEENTRY *pHead;
198 /** Our addition: */
199 GDBJITCODEENTRY *pTail;
200} GDBJITDESCRIPTOR;
201
202/** GDB JIT: Our simple symbol file data. */
203typedef struct GDBJITSYMFILE
204{
205 Elf64_Ehdr EHdr;
206# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
207 Elf64_Shdr aShdrs[5];
208# else
209 Elf64_Shdr aShdrs[7];
210 Elf64_Phdr aPhdrs[2];
211# endif
212 /** The dwarf ehframe data for the chunk. */
213 uint8_t abEhFrame[512];
214 char szzStrTab[128];
215 Elf64_Sym aSymbols[3];
216# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
217 Elf64_Sym aDynSyms[2];
218 Elf64_Dyn aDyn[6];
219# endif
220} GDBJITSYMFILE;
221
222extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
223extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
224
225/** Init once for g_IemNativeGdbJitLock. */
226static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
227/** Init once for the critical section. */
228static RTCRITSECT g_IemNativeGdbJitLock;
229
230/** GDB reads the info here. */
231GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
232
233/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
234DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
235{
236 ASMNopPause();
237}
238
239/** @callback_method_impl{FNRTONCE} */
240static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
241{
242 RT_NOREF(pvUser);
243 return RTCritSectInit(&g_IemNativeGdbJitLock);
244}
245
246
247# endif /* IEMNATIVE_USE_GDB_JIT */
248
249/**
250 * Per-chunk unwind info for non-windows hosts.
251 */
252typedef struct IEMEXECMEMCHUNKEHFRAME
253{
254# ifdef IEMNATIVE_USE_LIBUNWIND
255 /** The offset of the FDA into abEhFrame. */
256 uintptr_t offFda;
257# else
258 /** 'struct object' storage area. */
259 uint8_t abObject[1024];
260# endif
261# ifdef IEMNATIVE_USE_GDB_JIT
262# if 0
263 /** The GDB JIT 'symbol file' data. */
264 GDBJITSYMFILE GdbJitSymFile;
265# endif
266 /** The GDB JIT list entry. */
267 GDBJITCODEENTRY GdbJitEntry;
268# endif
269 /** The dwarf ehframe data for the chunk. */
270 uint8_t abEhFrame[512];
271} IEMEXECMEMCHUNKEHFRAME;
272/** Pointer to per-chunk info info for non-windows hosts. */
273typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
274#endif
275
276
277/**
278 * An chunk of executable memory.
279 */
280typedef struct IEMEXECMEMCHUNK
281{
282#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
283 /** Number of free items in this chunk. */
284 uint32_t cFreeUnits;
285 /** Hint were to start searching for free space in the allocation bitmap. */
286 uint32_t idxFreeHint;
287#else
288 /** The heap handle. */
289 RTHEAPSIMPLE hHeap;
290#endif
291 /** Pointer to the chunk. */
292 void *pvChunk;
293#ifdef IN_RING3
294 /**
295 * Pointer to the unwind information.
296 *
297 * This is used during C++ throw and longjmp (windows and probably most other
298 * platforms). Some debuggers (windbg) makes use of it as well.
299 *
300 * Windows: This is allocated from hHeap on windows because (at least for
301 * AMD64) the UNWIND_INFO structure address in the
302 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
303 *
304 * Others: Allocated from the regular heap to avoid unnecessary executable data
305 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
306 void *pvUnwindInfo;
307#elif defined(IN_RING0)
308 /** Allocation handle. */
309 RTR0MEMOBJ hMemObj;
310#endif
311} IEMEXECMEMCHUNK;
312/** Pointer to a memory chunk. */
313typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
314
315
316/**
317 * Executable memory allocator for the native recompiler.
318 */
319typedef struct IEMEXECMEMALLOCATOR
320{
321 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
322 uint32_t uMagic;
323
324 /** The chunk size. */
325 uint32_t cbChunk;
326 /** The maximum number of chunks. */
327 uint32_t cMaxChunks;
328 /** The current number of chunks. */
329 uint32_t cChunks;
330 /** Hint where to start looking for available memory. */
331 uint32_t idxChunkHint;
332 /** Statistics: Current number of allocations. */
333 uint32_t cAllocations;
334
335 /** The total amount of memory available. */
336 uint64_t cbTotal;
337 /** Total amount of free memory. */
338 uint64_t cbFree;
339 /** Total amount of memory allocated. */
340 uint64_t cbAllocated;
341
342#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
343 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
344 *
345 * Since the chunk size is a power of two and the minimum chunk size is a lot
346 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
347 * require a whole number of uint64_t elements in the allocation bitmap. So,
348 * for sake of simplicity, they are allocated as one continous chunk for
349 * simplicity/laziness. */
350 uint64_t *pbmAlloc;
351 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
352 uint32_t cUnitsPerChunk;
353 /** Number of bitmap elements per chunk (for quickly locating the bitmap
354 * portion corresponding to an chunk). */
355 uint32_t cBitmapElementsPerChunk;
356#else
357 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
358 * @{ */
359 /** The size of the heap internal block header. This is used to adjust the
360 * request memory size to make sure there is exacly enough room for a header at
361 * the end of the blocks we allocate before the next 64 byte alignment line. */
362 uint32_t cbHeapBlockHdr;
363 /** The size of initial heap allocation required make sure the first
364 * allocation is correctly aligned. */
365 uint32_t cbHeapAlignTweak;
366 /** The alignment tweak allocation address. */
367 void *pvAlignTweak;
368 /** @} */
369#endif
370
371#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
372 /** Pointer to the array of unwind info running parallel to aChunks (same
373 * allocation as this structure, located after the bitmaps).
374 * (For Windows, the structures must reside in 32-bit RVA distance to the
375 * actual chunk, so they are allocated off the chunk.) */
376 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
377#endif
378
379 /** The allocation chunks. */
380 RT_FLEXIBLE_ARRAY_EXTENSION
381 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
382} IEMEXECMEMALLOCATOR;
383/** Pointer to an executable memory allocator. */
384typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
385
386/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
387#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
388
389
390static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
391
392
393/**
394 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
395 * the heap statistics.
396 */
397static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
398 uint32_t cbReq, uint32_t idxChunk)
399{
400 pExecMemAllocator->cAllocations += 1;
401 pExecMemAllocator->cbAllocated += cbReq;
402#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
403 pExecMemAllocator->cbFree -= cbReq;
404#else
405 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
406#endif
407 pExecMemAllocator->idxChunkHint = idxChunk;
408
409#ifdef RT_OS_DARWIN
410 /*
411 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
412 * on darwin. So, we mark the pages returned as read+write after alloc and
413 * expect the caller to call iemExecMemAllocatorReadyForUse when done
414 * writing to the allocation.
415 *
416 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
417 * for details.
418 */
419 /** @todo detect if this is necessary... it wasn't required on 10.15 or
420 * whatever older version it was. */
421 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
422 AssertRC(rc);
423#endif
424
425 return pvRet;
426}
427
428
429#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
430static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
431 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
432{
433 /*
434 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
435 */
436 Assert(!(cToScan & 63));
437 Assert(!(idxFirst & 63));
438 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
439 pbmAlloc += idxFirst / 64;
440
441 /*
442 * Scan the bitmap for cReqUnits of consequtive clear bits
443 */
444 /** @todo This can probably be done more efficiently for non-x86 systems. */
445 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
446 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
447 {
448 uint32_t idxAddBit = 1;
449 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
450 idxAddBit++;
451 if (idxAddBit >= cReqUnits)
452 {
453 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
454
455 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
456 pChunk->cFreeUnits -= cReqUnits;
457 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
458
459 void * const pvRet = (uint8_t *)pChunk->pvChunk
460 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
461
462 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
463 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
464 }
465
466 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
467 }
468 return NULL;
469}
470#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
471
472
473static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
474{
475#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
476 /*
477 * Figure out how much to allocate.
478 */
479 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
480 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
481 {
482 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
483 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
484 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
485 {
486 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
487 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
488 if (pvRet)
489 return pvRet;
490 }
491 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
492 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
493 cReqUnits, idxChunk);
494 }
495#else
496 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
497 if (pvRet)
498 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
499#endif
500 return NULL;
501
502}
503
504
505/**
506 * Allocates @a cbReq bytes of executable memory.
507 *
508 * @returns Pointer to the memory, NULL if out of memory or other problem
509 * encountered.
510 * @param pVCpu The cross context virtual CPU structure of the calling
511 * thread.
512 * @param cbReq How many bytes are required.
513 */
514static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
515{
516 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
517 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
518 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
519
520
521 for (unsigned iIteration = 0;; iIteration++)
522 {
523 /*
524 * Adjust the request size so it'll fit the allocator alignment/whatnot.
525 *
526 * For the RTHeapSimple allocator this means to follow the logic described
527 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
528 * existing chunks if we think we've got sufficient free memory around.
529 *
530 * While for the alternative one we just align it up to a whole unit size.
531 */
532#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
533 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
534#else
535 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
536#endif
537 if (cbReq <= pExecMemAllocator->cbFree)
538 {
539 uint32_t const cChunks = pExecMemAllocator->cChunks;
540 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
541 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
542 {
543 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
544 if (pvRet)
545 return pvRet;
546 }
547 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
548 {
549 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
550 if (pvRet)
551 return pvRet;
552 }
553 }
554
555 /*
556 * Can we grow it with another chunk?
557 */
558 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
559 {
560 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
561 AssertLogRelRCReturn(rc, NULL);
562
563 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
564 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
565 if (pvRet)
566 return pvRet;
567 AssertFailed();
568 }
569
570 /*
571 * Try prune native TBs once.
572 */
573 if (iIteration == 0)
574 iemTbAllocatorFreeupNativeSpace(pVCpu, cbReq / sizeof(IEMNATIVEINSTR));
575 else
576 {
577 /** @todo stats... */
578 return NULL;
579 }
580 }
581
582}
583
584
585/** This is a hook that we may need later for changing memory protection back
586 * to readonly+exec */
587static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
588{
589#ifdef RT_OS_DARWIN
590 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
591 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
592 AssertRC(rc); RT_NOREF(pVCpu);
593
594 /*
595 * Flush the instruction cache:
596 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
597 */
598 /* sys_dcache_flush(pv, cb); - not necessary */
599 sys_icache_invalidate(pv, cb);
600#else
601 RT_NOREF(pVCpu, pv, cb);
602#endif
603}
604
605
606/**
607 * Frees executable memory.
608 */
609void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
610{
611 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
612 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
613 Assert(pv);
614#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
615 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
616#else
617 Assert(!((uintptr_t)pv & 63));
618#endif
619
620 /* Align the size as we did when allocating the block. */
621#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
622 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
623#else
624 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
625#endif
626
627 /* Free it / assert sanity. */
628#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
629 uint32_t const cChunks = pExecMemAllocator->cChunks;
630 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
631 bool fFound = false;
632 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
633 {
634 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
635 fFound = offChunk < cbChunk;
636 if (fFound)
637 {
638#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
639 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
640 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
641
642 /* Check that it's valid and free it. */
643 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
644 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
645 for (uint32_t i = 1; i < cReqUnits; i++)
646 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
647 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
648
649 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
650 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
651
652 /* Update the stats. */
653 pExecMemAllocator->cbAllocated -= cb;
654 pExecMemAllocator->cbFree += cb;
655 pExecMemAllocator->cAllocations -= 1;
656 return;
657#else
658 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
659 break;
660#endif
661 }
662 }
663# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
664 AssertFailed();
665# else
666 Assert(fFound);
667# endif
668#endif
669
670#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
671 /* Update stats while cb is freshly calculated.*/
672 pExecMemAllocator->cbAllocated -= cb;
673 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
674 pExecMemAllocator->cAllocations -= 1;
675
676 /* Free it. */
677 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
678#endif
679}
680
681
682
683#ifdef IN_RING3
684# ifdef RT_OS_WINDOWS
685
686/**
687 * Initializes the unwind info structures for windows hosts.
688 */
689static int
690iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
691 void *pvChunk, uint32_t idxChunk)
692{
693 RT_NOREF(pVCpu);
694
695 /*
696 * The AMD64 unwind opcodes.
697 *
698 * This is a program that starts with RSP after a RET instruction that
699 * ends up in recompiled code, and the operations we describe here will
700 * restore all non-volatile registers and bring RSP back to where our
701 * RET address is. This means it's reverse order from what happens in
702 * the prologue.
703 *
704 * Note! Using a frame register approach here both because we have one
705 * and but mainly because the UWOP_ALLOC_LARGE argument values
706 * would be a pain to write initializers for. On the positive
707 * side, we're impervious to changes in the the stack variable
708 * area can can deal with dynamic stack allocations if necessary.
709 */
710 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
711 {
712 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
713 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
714 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
715 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
716 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
717 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
718 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
719 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
720 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
721 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
722 };
723 union
724 {
725 IMAGE_UNWIND_INFO Info;
726 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
727 } s_UnwindInfo =
728 {
729 {
730 /* .Version = */ 1,
731 /* .Flags = */ 0,
732 /* .SizeOfProlog = */ 16, /* whatever */
733 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
734 /* .FrameRegister = */ X86_GREG_xBP,
735 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
736 }
737 };
738 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
739 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
740
741 /*
742 * Calc how much space we need and allocate it off the exec heap.
743 */
744 unsigned const cFunctionEntries = 1;
745 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
746 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
747# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
748 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
749 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
750 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
751# else
752 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
753 - pExecMemAllocator->cbHeapBlockHdr;
754 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
755 32 /*cbAlignment*/);
756# endif
757 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
758 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
759
760 /*
761 * Initialize the structures.
762 */
763 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
764
765 paFunctions[0].BeginAddress = 0;
766 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
767 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
768
769 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
770 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
771
772 /*
773 * Register it.
774 */
775 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
776 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
777
778 return VINF_SUCCESS;
779}
780
781
782# else /* !RT_OS_WINDOWS */
783
784/**
785 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
786 */
787DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
788{
789 if (iValue >= 64)
790 {
791 Assert(iValue < 0x2000);
792 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
793 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
794 }
795 else if (iValue >= 0)
796 *Ptr.pb++ = (uint8_t)iValue;
797 else if (iValue > -64)
798 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
799 else
800 {
801 Assert(iValue > -0x2000);
802 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
803 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
804 }
805 return Ptr;
806}
807
808
809/**
810 * Emits an ULEB128 encoded value (up to 64-bit wide).
811 */
812DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
813{
814 while (uValue >= 0x80)
815 {
816 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
817 uValue >>= 7;
818 }
819 *Ptr.pb++ = (uint8_t)uValue;
820 return Ptr;
821}
822
823
824/**
825 * Emits a CFA rule as register @a uReg + offset @a off.
826 */
827DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
828{
829 *Ptr.pb++ = DW_CFA_def_cfa;
830 Ptr = iemDwarfPutUleb128(Ptr, uReg);
831 Ptr = iemDwarfPutUleb128(Ptr, off);
832 return Ptr;
833}
834
835
836/**
837 * Emits a register (@a uReg) save location:
838 * CFA + @a off * data_alignment_factor
839 */
840DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
841{
842 if (uReg < 0x40)
843 *Ptr.pb++ = DW_CFA_offset | uReg;
844 else
845 {
846 *Ptr.pb++ = DW_CFA_offset_extended;
847 Ptr = iemDwarfPutUleb128(Ptr, uReg);
848 }
849 Ptr = iemDwarfPutUleb128(Ptr, off);
850 return Ptr;
851}
852
853
854# if 0 /* unused */
855/**
856 * Emits a register (@a uReg) save location, using signed offset:
857 * CFA + @a offSigned * data_alignment_factor
858 */
859DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
860{
861 *Ptr.pb++ = DW_CFA_offset_extended_sf;
862 Ptr = iemDwarfPutUleb128(Ptr, uReg);
863 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
864 return Ptr;
865}
866# endif
867
868
869/**
870 * Initializes the unwind info section for non-windows hosts.
871 */
872static int
873iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
874 void *pvChunk, uint32_t idxChunk)
875{
876 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
877 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
878
879 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
880
881 /*
882 * Generate the CIE first.
883 */
884# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
885 uint8_t const iDwarfVer = 3;
886# else
887 uint8_t const iDwarfVer = 4;
888# endif
889 RTPTRUNION const PtrCie = Ptr;
890 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
891 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
892 *Ptr.pb++ = iDwarfVer; /* DwARF version */
893 *Ptr.pb++ = 0; /* Augmentation. */
894 if (iDwarfVer >= 4)
895 {
896 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
897 *Ptr.pb++ = 0; /* Segment selector size. */
898 }
899# ifdef RT_ARCH_AMD64
900 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
901# else
902 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
903# endif
904 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
905# ifdef RT_ARCH_AMD64
906 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
907# elif defined(RT_ARCH_ARM64)
908 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
909# else
910# error "port me"
911# endif
912 /* Initial instructions: */
913# ifdef RT_ARCH_AMD64
914 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
915 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
916 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
917 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
918 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
919 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
920 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
921 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
922# elif defined(RT_ARCH_ARM64)
923# if 1
924 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
925# else
926 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
927# endif
928 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
929 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
930 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
931 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
932 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
933 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
934 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
935 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
936 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
937 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
938 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
939 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
940 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
941 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
942# else
943# error "port me"
944# endif
945 while ((Ptr.u - PtrCie.u) & 3)
946 *Ptr.pb++ = DW_CFA_nop;
947 /* Finalize the CIE size. */
948 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
949
950 /*
951 * Generate an FDE for the whole chunk area.
952 */
953# ifdef IEMNATIVE_USE_LIBUNWIND
954 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
955# endif
956 RTPTRUNION const PtrFde = Ptr;
957 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
958 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
959 Ptr.pu32++;
960 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
961 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
962# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
963 *Ptr.pb++ = DW_CFA_nop;
964# endif
965 while ((Ptr.u - PtrFde.u) & 3)
966 *Ptr.pb++ = DW_CFA_nop;
967 /* Finalize the FDE size. */
968 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
969
970 /* Terminator entry. */
971 *Ptr.pu32++ = 0;
972 *Ptr.pu32++ = 0; /* just to be sure... */
973 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
974
975 /*
976 * Register it.
977 */
978# ifdef IEMNATIVE_USE_LIBUNWIND
979 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
980# else
981 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
982 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
983# endif
984
985# ifdef IEMNATIVE_USE_GDB_JIT
986 /*
987 * Now for telling GDB about this (experimental).
988 *
989 * This seems to work best with ET_DYN.
990 */
991 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
992# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
993 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
994 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
995# else
996 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
997 - pExecMemAllocator->cbHeapBlockHdr;
998 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
999# endif
1000 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
1001 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
1002
1003 RT_ZERO(*pSymFile);
1004
1005 /*
1006 * The ELF header:
1007 */
1008 pSymFile->EHdr.e_ident[0] = ELFMAG0;
1009 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1010 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1011 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1012 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1013 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1014 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1015 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1016# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1017 pSymFile->EHdr.e_type = ET_DYN;
1018# else
1019 pSymFile->EHdr.e_type = ET_REL;
1020# endif
1021# ifdef RT_ARCH_AMD64
1022 pSymFile->EHdr.e_machine = EM_AMD64;
1023# elif defined(RT_ARCH_ARM64)
1024 pSymFile->EHdr.e_machine = EM_AARCH64;
1025# else
1026# error "port me"
1027# endif
1028 pSymFile->EHdr.e_version = 1; /*?*/
1029 pSymFile->EHdr.e_entry = 0;
1030# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1031 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1032# else
1033 pSymFile->EHdr.e_phoff = 0;
1034# endif
1035 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1036 pSymFile->EHdr.e_flags = 0;
1037 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1038# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1039 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1040 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1041# else
1042 pSymFile->EHdr.e_phentsize = 0;
1043 pSymFile->EHdr.e_phnum = 0;
1044# endif
1045 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1046 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1047 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1048
1049 uint32_t offStrTab = 0;
1050#define APPEND_STR(a_szStr) do { \
1051 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1052 offStrTab += sizeof(a_szStr); \
1053 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1054 } while (0)
1055#define APPEND_STR_FMT(a_szStr, ...) do { \
1056 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1057 offStrTab++; \
1058 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1059 } while (0)
1060
1061 /*
1062 * Section headers.
1063 */
1064 /* Section header #0: NULL */
1065 unsigned i = 0;
1066 APPEND_STR("");
1067 RT_ZERO(pSymFile->aShdrs[i]);
1068 i++;
1069
1070 /* Section header: .eh_frame */
1071 pSymFile->aShdrs[i].sh_name = offStrTab;
1072 APPEND_STR(".eh_frame");
1073 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1074 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1075# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1076 pSymFile->aShdrs[i].sh_offset
1077 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1078# else
1079 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1080 pSymFile->aShdrs[i].sh_offset = 0;
1081# endif
1082
1083 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1084 pSymFile->aShdrs[i].sh_link = 0;
1085 pSymFile->aShdrs[i].sh_info = 0;
1086 pSymFile->aShdrs[i].sh_addralign = 1;
1087 pSymFile->aShdrs[i].sh_entsize = 0;
1088 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1089 i++;
1090
1091 /* Section header: .shstrtab */
1092 unsigned const iShStrTab = i;
1093 pSymFile->EHdr.e_shstrndx = iShStrTab;
1094 pSymFile->aShdrs[i].sh_name = offStrTab;
1095 APPEND_STR(".shstrtab");
1096 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1097 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1098# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1099 pSymFile->aShdrs[i].sh_offset
1100 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1101# else
1102 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1103 pSymFile->aShdrs[i].sh_offset = 0;
1104# endif
1105 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1106 pSymFile->aShdrs[i].sh_link = 0;
1107 pSymFile->aShdrs[i].sh_info = 0;
1108 pSymFile->aShdrs[i].sh_addralign = 1;
1109 pSymFile->aShdrs[i].sh_entsize = 0;
1110 i++;
1111
1112 /* Section header: .symbols */
1113 pSymFile->aShdrs[i].sh_name = offStrTab;
1114 APPEND_STR(".symtab");
1115 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1116 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1117 pSymFile->aShdrs[i].sh_offset
1118 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1119 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1120 pSymFile->aShdrs[i].sh_link = iShStrTab;
1121 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1122 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1123 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1124 i++;
1125
1126# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1127 /* Section header: .symbols */
1128 pSymFile->aShdrs[i].sh_name = offStrTab;
1129 APPEND_STR(".dynsym");
1130 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1131 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1132 pSymFile->aShdrs[i].sh_offset
1133 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1134 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1135 pSymFile->aShdrs[i].sh_link = iShStrTab;
1136 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1137 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1138 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1139 i++;
1140# endif
1141
1142# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1143 /* Section header: .dynamic */
1144 pSymFile->aShdrs[i].sh_name = offStrTab;
1145 APPEND_STR(".dynamic");
1146 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1147 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1148 pSymFile->aShdrs[i].sh_offset
1149 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1150 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1151 pSymFile->aShdrs[i].sh_link = iShStrTab;
1152 pSymFile->aShdrs[i].sh_info = 0;
1153 pSymFile->aShdrs[i].sh_addralign = 1;
1154 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1155 i++;
1156# endif
1157
1158 /* Section header: .text */
1159 unsigned const iShText = i;
1160 pSymFile->aShdrs[i].sh_name = offStrTab;
1161 APPEND_STR(".text");
1162 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1163 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1164# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1165 pSymFile->aShdrs[i].sh_offset
1166 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1167# else
1168 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1169 pSymFile->aShdrs[i].sh_offset = 0;
1170# endif
1171 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1172 pSymFile->aShdrs[i].sh_link = 0;
1173 pSymFile->aShdrs[i].sh_info = 0;
1174 pSymFile->aShdrs[i].sh_addralign = 1;
1175 pSymFile->aShdrs[i].sh_entsize = 0;
1176 i++;
1177
1178 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1179
1180# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1181 /*
1182 * The program headers:
1183 */
1184 /* Everything in a single LOAD segment: */
1185 i = 0;
1186 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1187 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1188 pSymFile->aPhdrs[i].p_offset
1189 = pSymFile->aPhdrs[i].p_vaddr
1190 = pSymFile->aPhdrs[i].p_paddr = 0;
1191 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1192 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1193 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1194 i++;
1195 /* The .dynamic segment. */
1196 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1197 pSymFile->aPhdrs[i].p_flags = PF_R;
1198 pSymFile->aPhdrs[i].p_offset
1199 = pSymFile->aPhdrs[i].p_vaddr
1200 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1201 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1202 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1203 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1204 i++;
1205
1206 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1207
1208 /*
1209 * The dynamic section:
1210 */
1211 i = 0;
1212 pSymFile->aDyn[i].d_tag = DT_SONAME;
1213 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1214 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1215 i++;
1216 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1217 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1218 i++;
1219 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1220 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1221 i++;
1222 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1223 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1224 i++;
1225 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1226 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1227 i++;
1228 pSymFile->aDyn[i].d_tag = DT_NULL;
1229 i++;
1230 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1231# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1232
1233 /*
1234 * Symbol tables:
1235 */
1236 /** @todo gdb doesn't seem to really like this ... */
1237 i = 0;
1238 pSymFile->aSymbols[i].st_name = 0;
1239 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1240 pSymFile->aSymbols[i].st_value = 0;
1241 pSymFile->aSymbols[i].st_size = 0;
1242 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1243 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1244# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1245 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1246# endif
1247 i++;
1248
1249 pSymFile->aSymbols[i].st_name = 0;
1250 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1251 pSymFile->aSymbols[i].st_value = 0;
1252 pSymFile->aSymbols[i].st_size = 0;
1253 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1254 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1255 i++;
1256
1257 pSymFile->aSymbols[i].st_name = offStrTab;
1258 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1259# if 0
1260 pSymFile->aSymbols[i].st_shndx = iShText;
1261 pSymFile->aSymbols[i].st_value = 0;
1262# else
1263 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1264 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1265# endif
1266 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1267 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1268 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1269# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1270 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1271 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1272# endif
1273 i++;
1274
1275 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1276 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1277
1278 /*
1279 * The GDB JIT entry and informing GDB.
1280 */
1281 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1282# if 1
1283 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1284# else
1285 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1286# endif
1287
1288 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1289 RTCritSectEnter(&g_IemNativeGdbJitLock);
1290 pEhFrame->GdbJitEntry.pNext = NULL;
1291 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1292 if (__jit_debug_descriptor.pTail)
1293 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1294 else
1295 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1296 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1297 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1298
1299 /* Notify GDB: */
1300 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1301 __jit_debug_register_code();
1302 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1303 RTCritSectLeave(&g_IemNativeGdbJitLock);
1304
1305# else /* !IEMNATIVE_USE_GDB_JIT */
1306 RT_NOREF(pVCpu);
1307# endif /* !IEMNATIVE_USE_GDB_JIT */
1308
1309 return VINF_SUCCESS;
1310}
1311
1312# endif /* !RT_OS_WINDOWS */
1313#endif /* IN_RING3 */
1314
1315
1316/**
1317 * Adds another chunk to the executable memory allocator.
1318 *
1319 * This is used by the init code for the initial allocation and later by the
1320 * regular allocator function when it's out of memory.
1321 */
1322static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1323{
1324 /* Check that we've room for growth. */
1325 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1326 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1327
1328 /* Allocate a chunk. */
1329#ifdef RT_OS_DARWIN
1330 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1331#else
1332 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1333#endif
1334 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1335
1336#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1337 int rc = VINF_SUCCESS;
1338#else
1339 /* Initialize the heap for the chunk. */
1340 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1341 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1342 AssertRC(rc);
1343 if (RT_SUCCESS(rc))
1344 {
1345 /*
1346 * We want the memory to be aligned on 64 byte, so the first time thru
1347 * here we do some exploratory allocations to see how we can achieve this.
1348 * On subsequent runs we only make an initial adjustment allocation, if
1349 * necessary.
1350 *
1351 * Since we own the heap implementation, we know that the internal block
1352 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1353 * so all we need to wrt allocation size adjustments is to add 32 bytes
1354 * to the size, align up by 64 bytes, and subtract 32 bytes.
1355 *
1356 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1357 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1358 * allocation to force subsequent allocations to return 64 byte aligned
1359 * user areas.
1360 */
1361 if (!pExecMemAllocator->cbHeapBlockHdr)
1362 {
1363 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1364 pExecMemAllocator->cbHeapAlignTweak = 64;
1365 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1366 32 /*cbAlignment*/);
1367 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1368
1369 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1370 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1371 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1372 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1373 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1374
1375 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1376 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1377 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1378 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1379 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1380
1381 RTHeapSimpleFree(hHeap, pvTest2);
1382 RTHeapSimpleFree(hHeap, pvTest1);
1383 }
1384 else
1385 {
1386 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1387 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1388 }
1389 if (RT_SUCCESS(rc))
1390#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1391 {
1392 /*
1393 * Add the chunk.
1394 *
1395 * This must be done before the unwind init so windows can allocate
1396 * memory from the chunk when using the alternative sub-allocator.
1397 */
1398 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1399#ifdef IN_RING3
1400 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1401#endif
1402#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1403 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1404#else
1405 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1406 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1407 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1408 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1409#endif
1410
1411 pExecMemAllocator->cChunks = idxChunk + 1;
1412 pExecMemAllocator->idxChunkHint = idxChunk;
1413
1414#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1415 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1416 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1417#else
1418 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1419 pExecMemAllocator->cbTotal += cbFree;
1420 pExecMemAllocator->cbFree += cbFree;
1421#endif
1422
1423#ifdef IN_RING3
1424 /*
1425 * Initialize the unwind information (this cannot really fail atm).
1426 * (This sets pvUnwindInfo.)
1427 */
1428 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1429 if (RT_SUCCESS(rc))
1430#endif
1431 {
1432 return VINF_SUCCESS;
1433 }
1434
1435#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1436 /* Just in case the impossible happens, undo the above up: */
1437 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1438 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1439 pExecMemAllocator->cChunks = idxChunk;
1440 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1441 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1442 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1443 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1444#endif
1445 }
1446#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1447 }
1448#endif
1449 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1450 RT_NOREF(pVCpu);
1451 return rc;
1452}
1453
1454
1455/**
1456 * Initializes the executable memory allocator for native recompilation on the
1457 * calling EMT.
1458 *
1459 * @returns VBox status code.
1460 * @param pVCpu The cross context virtual CPU structure of the calling
1461 * thread.
1462 * @param cbMax The max size of the allocator.
1463 * @param cbInitial The initial allocator size.
1464 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1465 * dependent).
1466 */
1467int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1468{
1469 /*
1470 * Validate input.
1471 */
1472 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1473 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1474 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1475 || cbChunk == 0
1476 || ( RT_IS_POWER_OF_TWO(cbChunk)
1477 && cbChunk >= _1M
1478 && cbChunk <= _256M
1479 && cbChunk <= cbMax),
1480 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1481 VERR_OUT_OF_RANGE);
1482
1483 /*
1484 * Adjust/figure out the chunk size.
1485 */
1486 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1487 {
1488 if (cbMax >= _256M)
1489 cbChunk = _64M;
1490 else
1491 {
1492 if (cbMax < _16M)
1493 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1494 else
1495 cbChunk = (uint32_t)cbMax / 4;
1496 if (!RT_IS_POWER_OF_TWO(cbChunk))
1497 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1498 }
1499 }
1500
1501 if (cbChunk > cbMax)
1502 cbMax = cbChunk;
1503 else
1504 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1505 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1506 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1507
1508 /*
1509 * Allocate and initialize the allocatore instance.
1510 */
1511 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1512#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1513 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1514 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1515 cbNeeded += cbBitmap * cMaxChunks;
1516 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1517 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1518#endif
1519#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1520 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1521 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1522#endif
1523 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1524 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1525 VERR_NO_MEMORY);
1526 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1527 pExecMemAllocator->cbChunk = cbChunk;
1528 pExecMemAllocator->cMaxChunks = cMaxChunks;
1529 pExecMemAllocator->cChunks = 0;
1530 pExecMemAllocator->idxChunkHint = 0;
1531 pExecMemAllocator->cAllocations = 0;
1532 pExecMemAllocator->cbTotal = 0;
1533 pExecMemAllocator->cbFree = 0;
1534 pExecMemAllocator->cbAllocated = 0;
1535#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1536 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1537 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1538 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1539 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1540#endif
1541#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1542 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1543#endif
1544 for (uint32_t i = 0; i < cMaxChunks; i++)
1545 {
1546#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1547 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1548 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1549#else
1550 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1551#endif
1552 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1553#ifdef IN_RING0
1554 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1555#else
1556 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1557#endif
1558 }
1559 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1560
1561 /*
1562 * Do the initial allocations.
1563 */
1564 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1565 {
1566 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1567 AssertLogRelRCReturn(rc, rc);
1568 }
1569
1570 pExecMemAllocator->idxChunkHint = 0;
1571
1572 return VINF_SUCCESS;
1573}
1574
1575
1576/*********************************************************************************************************************************
1577* Native Recompilation *
1578*********************************************************************************************************************************/
1579
1580
1581/**
1582 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1583 */
1584IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1585{
1586 pVCpu->iem.s.cInstructions += idxInstr;
1587 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1588}
1589
1590
1591/**
1592 * Used by TB code when it wants to raise a \#GP(0).
1593 */
1594IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
1595{
1596 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1597#ifndef _MSC_VER
1598 return VINF_IEM_RAISED_XCPT; /* not reached */
1599#endif
1600}
1601
1602
1603/**
1604 * Used by TB code when it wants to raise a \#NM.
1605 */
1606IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
1607{
1608 iemRaiseDeviceNotAvailableJmp(pVCpu);
1609#ifndef _MSC_VER
1610 return VINF_IEM_RAISED_XCPT; /* not reached */
1611#endif
1612}
1613
1614
1615/**
1616 * Used by TB code when it wants to raise a \#UD.
1617 */
1618IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
1619{
1620 iemRaiseUndefinedOpcodeJmp(pVCpu);
1621#ifndef _MSC_VER
1622 return VINF_IEM_RAISED_XCPT; /* not reached */
1623#endif
1624}
1625
1626
1627/**
1628 * Used by TB code when it wants to raise a \#MF.
1629 */
1630IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
1631{
1632 iemRaiseMathFaultJmp(pVCpu);
1633#ifndef _MSC_VER
1634 return VINF_IEM_RAISED_XCPT; /* not reached */
1635#endif
1636}
1637
1638
1639/**
1640 * Used by TB code when it wants to raise a \#XF.
1641 */
1642IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
1643{
1644 iemRaiseSimdFpExceptionJmp(pVCpu);
1645#ifndef _MSC_VER
1646 return VINF_IEM_RAISED_XCPT; /* not reached */
1647#endif
1648}
1649
1650
1651/**
1652 * Used by TB code when detecting opcode changes.
1653 * @see iemThreadeFuncWorkerObsoleteTb
1654 */
1655IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
1656{
1657 /* We set fSafeToFree to false where as we're being called in the context
1658 of a TB callback function, which for native TBs means we cannot release
1659 the executable memory till we've returned our way back to iemTbExec as
1660 that return path codes via the native code generated for the TB. */
1661 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1662 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
1663 return VINF_IEM_REEXEC_BREAK;
1664}
1665
1666
1667/**
1668 * Used by TB code when we need to switch to a TB with CS.LIM checking.
1669 */
1670IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
1671{
1672 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
1673 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1674 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
1675 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
1676 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
1677 return VINF_IEM_REEXEC_BREAK;
1678}
1679
1680
1681/**
1682 * Used by TB code when we missed a PC check after a branch.
1683 */
1684IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
1685{
1686 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
1687 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1688 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
1689 pVCpu->iem.s.pbInstrBuf));
1690 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
1691 return VINF_IEM_REEXEC_BREAK;
1692}
1693
1694
1695
1696/*********************************************************************************************************************************
1697* Helpers: Segmented memory fetches and stores. *
1698*********************************************************************************************************************************/
1699
1700/**
1701 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1702 */
1703IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1704{
1705#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1706 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1707#else
1708 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1709#endif
1710}
1711
1712
1713/**
1714 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1715 * to 16 bits.
1716 */
1717IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1718{
1719#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1720 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1721#else
1722 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1723#endif
1724}
1725
1726
1727/**
1728 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1729 * to 32 bits.
1730 */
1731IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1732{
1733#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1734 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1735#else
1736 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1737#endif
1738}
1739
1740/**
1741 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1742 * to 64 bits.
1743 */
1744IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1745{
1746#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1747 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1748#else
1749 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1750#endif
1751}
1752
1753
1754/**
1755 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1756 */
1757IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1758{
1759#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1760 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1761#else
1762 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1763#endif
1764}
1765
1766
1767/**
1768 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1769 * to 32 bits.
1770 */
1771IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1772{
1773#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1774 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1775#else
1776 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1777#endif
1778}
1779
1780
1781/**
1782 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1783 * to 64 bits.
1784 */
1785IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1786{
1787#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1788 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1789#else
1790 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1791#endif
1792}
1793
1794
1795/**
1796 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1797 */
1798IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1799{
1800#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1801 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1802#else
1803 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1804#endif
1805}
1806
1807
1808/**
1809 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
1810 * to 64 bits.
1811 */
1812IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1813{
1814#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1815 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1816#else
1817 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1818#endif
1819}
1820
1821
1822/**
1823 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1824 */
1825IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1826{
1827#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1828 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
1829#else
1830 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
1831#endif
1832}
1833
1834
1835/**
1836 * Used by TB code to store unsigned 8-bit data w/ segmentation.
1837 */
1838IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
1839{
1840#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1841 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1842#else
1843 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1844#endif
1845}
1846
1847
1848/**
1849 * Used by TB code to store unsigned 16-bit data w/ segmentation.
1850 */
1851IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
1852{
1853#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1854 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1855#else
1856 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1857#endif
1858}
1859
1860
1861/**
1862 * Used by TB code to store unsigned 32-bit data w/ segmentation.
1863 */
1864IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
1865{
1866#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1867 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1868#else
1869 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1870#endif
1871}
1872
1873
1874/**
1875 * Used by TB code to store unsigned 64-bit data w/ segmentation.
1876 */
1877IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
1878{
1879#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1880 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1881#else
1882 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1883#endif
1884}
1885
1886
1887
1888/**
1889 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
1890 */
1891IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1892{
1893#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1894 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1895#else
1896 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1897#endif
1898}
1899
1900
1901/**
1902 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
1903 */
1904IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1905{
1906#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1907 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1908#else
1909 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1910#endif
1911}
1912
1913
1914/**
1915 * Used by TB code to store an 32-bit selector value onto a generic stack.
1916 *
1917 * Intel CPUs doesn't do write a whole dword, thus the special function.
1918 */
1919IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1920{
1921#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1922 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1923#else
1924 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1925#endif
1926}
1927
1928
1929/**
1930 * Used by TB code to push unsigned 64-bit value onto a generic stack.
1931 */
1932IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1933{
1934#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1935 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1936#else
1937 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1938#endif
1939}
1940
1941
1942/**
1943 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1944 */
1945IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1946{
1947#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1948 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1949#else
1950 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
1951#endif
1952}
1953
1954
1955/**
1956 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1957 */
1958IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1959{
1960#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1961 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
1962#else
1963 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
1964#endif
1965}
1966
1967
1968/**
1969 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
1970 */
1971IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1972{
1973#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1974 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
1975#else
1976 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
1977#endif
1978}
1979
1980
1981
1982/*********************************************************************************************************************************
1983* Helpers: Flat memory fetches and stores. *
1984*********************************************************************************************************************************/
1985
1986/**
1987 * Used by TB code to load unsigned 8-bit data w/ flat address.
1988 * @note Zero extending the value to 64-bit to simplify assembly.
1989 */
1990IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1991{
1992#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1993 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1994#else
1995 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1996#endif
1997}
1998
1999
2000/**
2001 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2002 * to 16 bits.
2003 * @note Zero extending the value to 64-bit to simplify assembly.
2004 */
2005IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2006{
2007#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2008 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2009#else
2010 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2011#endif
2012}
2013
2014
2015/**
2016 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2017 * to 32 bits.
2018 * @note Zero extending the value to 64-bit to simplify assembly.
2019 */
2020IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2021{
2022#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2023 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2024#else
2025 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2026#endif
2027}
2028
2029
2030/**
2031 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2032 * to 64 bits.
2033 */
2034IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2035{
2036#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2037 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2038#else
2039 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2040#endif
2041}
2042
2043
2044/**
2045 * Used by TB code to load unsigned 16-bit data w/ flat address.
2046 * @note Zero extending the value to 64-bit to simplify assembly.
2047 */
2048IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2049{
2050#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2051 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2052#else
2053 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2054#endif
2055}
2056
2057
2058/**
2059 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2060 * to 32 bits.
2061 * @note Zero extending the value to 64-bit to simplify assembly.
2062 */
2063IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2064{
2065#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2066 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2067#else
2068 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2069#endif
2070}
2071
2072
2073/**
2074 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2075 * to 64 bits.
2076 * @note Zero extending the value to 64-bit to simplify assembly.
2077 */
2078IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2079{
2080#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2081 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2082#else
2083 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2084#endif
2085}
2086
2087
2088/**
2089 * Used by TB code to load unsigned 32-bit data w/ flat address.
2090 * @note Zero extending the value to 64-bit to simplify assembly.
2091 */
2092IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2093{
2094#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2095 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2096#else
2097 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2098#endif
2099}
2100
2101
2102/**
2103 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
2104 * to 64 bits.
2105 * @note Zero extending the value to 64-bit to simplify assembly.
2106 */
2107IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2108{
2109#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2110 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2111#else
2112 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2113#endif
2114}
2115
2116
2117/**
2118 * Used by TB code to load unsigned 64-bit data w/ flat address.
2119 */
2120IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2121{
2122#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2123 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2124#else
2125 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
2126#endif
2127}
2128
2129
2130/**
2131 * Used by TB code to store unsigned 8-bit data w/ flat address.
2132 */
2133IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
2134{
2135#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2136 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
2137#else
2138 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
2139#endif
2140}
2141
2142
2143/**
2144 * Used by TB code to store unsigned 16-bit data w/ flat address.
2145 */
2146IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2147{
2148#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2149 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
2150#else
2151 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
2152#endif
2153}
2154
2155
2156/**
2157 * Used by TB code to store unsigned 32-bit data w/ flat address.
2158 */
2159IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2160{
2161#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2162 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
2163#else
2164 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
2165#endif
2166}
2167
2168
2169/**
2170 * Used by TB code to store unsigned 64-bit data w/ flat address.
2171 */
2172IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2173{
2174#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2175 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
2176#else
2177 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
2178#endif
2179}
2180
2181
2182
2183/**
2184 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
2185 */
2186IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2187{
2188#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2189 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2190#else
2191 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2192#endif
2193}
2194
2195
2196/**
2197 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
2198 */
2199IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2200{
2201#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2202 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2203#else
2204 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2205#endif
2206}
2207
2208
2209/**
2210 * Used by TB code to store a segment selector value onto a flat stack.
2211 *
2212 * Intel CPUs doesn't do write a whole dword, thus the special function.
2213 */
2214IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2215{
2216#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2217 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2218#else
2219 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2220#endif
2221}
2222
2223
2224/**
2225 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
2226 */
2227IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2228{
2229#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2230 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2231#else
2232 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2233#endif
2234}
2235
2236
2237/**
2238 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2239 */
2240IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2241{
2242#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2243 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2244#else
2245 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
2246#endif
2247}
2248
2249
2250/**
2251 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2252 */
2253IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2254{
2255#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2256 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2257#else
2258 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
2259#endif
2260}
2261
2262
2263/**
2264 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2265 */
2266IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2267{
2268#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2269 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2270#else
2271 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
2272#endif
2273}
2274
2275
2276
2277/*********************************************************************************************************************************
2278* Helpers: Segmented memory mapping. *
2279*********************************************************************************************************************************/
2280
2281/**
2282 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
2283 * segmentation.
2284 */
2285IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2286 RTGCPTR GCPtrMem, uint8_t iSegReg))
2287{
2288#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2289 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2290#else
2291 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2292#endif
2293}
2294
2295
2296/**
2297 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
2298 */
2299IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2300 RTGCPTR GCPtrMem, uint8_t iSegReg))
2301{
2302#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2303 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2304#else
2305 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2306#endif
2307}
2308
2309
2310/**
2311 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
2312 */
2313IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2314 RTGCPTR GCPtrMem, uint8_t iSegReg))
2315{
2316#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2317 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2318#else
2319 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2320#endif
2321}
2322
2323
2324/**
2325 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
2326 */
2327IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2328 RTGCPTR GCPtrMem, uint8_t iSegReg))
2329{
2330#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2331 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2332#else
2333 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2334#endif
2335}
2336
2337
2338/**
2339 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
2340 * segmentation.
2341 */
2342IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2343 RTGCPTR GCPtrMem, uint8_t iSegReg))
2344{
2345#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2346 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2347#else
2348 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2349#endif
2350}
2351
2352
2353/**
2354 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
2355 */
2356IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2357 RTGCPTR GCPtrMem, uint8_t iSegReg))
2358{
2359#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2360 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2361#else
2362 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2363#endif
2364}
2365
2366
2367/**
2368 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
2369 */
2370IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2371 RTGCPTR GCPtrMem, uint8_t iSegReg))
2372{
2373#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2374 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2375#else
2376 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2377#endif
2378}
2379
2380
2381/**
2382 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
2383 */
2384IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2385 RTGCPTR GCPtrMem, uint8_t iSegReg))
2386{
2387#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2388 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2389#else
2390 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2391#endif
2392}
2393
2394
2395/**
2396 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
2397 * segmentation.
2398 */
2399IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2400 RTGCPTR GCPtrMem, uint8_t iSegReg))
2401{
2402#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2403 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2404#else
2405 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2406#endif
2407}
2408
2409
2410/**
2411 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
2412 */
2413IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2414 RTGCPTR GCPtrMem, uint8_t iSegReg))
2415{
2416#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2417 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2418#else
2419 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2420#endif
2421}
2422
2423
2424/**
2425 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
2426 */
2427IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2428 RTGCPTR GCPtrMem, uint8_t iSegReg))
2429{
2430#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2431 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2432#else
2433 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2434#endif
2435}
2436
2437
2438/**
2439 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
2440 */
2441IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2442 RTGCPTR GCPtrMem, uint8_t iSegReg))
2443{
2444#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2445 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2446#else
2447 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2448#endif
2449}
2450
2451
2452/**
2453 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
2454 * segmentation.
2455 */
2456IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2457 RTGCPTR GCPtrMem, uint8_t iSegReg))
2458{
2459#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2460 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2461#else
2462 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2463#endif
2464}
2465
2466
2467/**
2468 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
2469 */
2470IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2471 RTGCPTR GCPtrMem, uint8_t iSegReg))
2472{
2473#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2474 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2475#else
2476 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2477#endif
2478}
2479
2480
2481/**
2482 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
2483 */
2484IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2485 RTGCPTR GCPtrMem, uint8_t iSegReg))
2486{
2487#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2488 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2489#else
2490 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2491#endif
2492}
2493
2494
2495/**
2496 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
2497 */
2498IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2499 RTGCPTR GCPtrMem, uint8_t iSegReg))
2500{
2501#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2502 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2503#else
2504 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2505#endif
2506}
2507
2508
2509/**
2510 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
2511 */
2512IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2513 RTGCPTR GCPtrMem, uint8_t iSegReg))
2514{
2515#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2516 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2517#else
2518 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2519#endif
2520}
2521
2522
2523/**
2524 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
2525 */
2526IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2527 RTGCPTR GCPtrMem, uint8_t iSegReg))
2528{
2529#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2530 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2531#else
2532 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2533#endif
2534}
2535
2536
2537/**
2538 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
2539 * segmentation.
2540 */
2541IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2542 RTGCPTR GCPtrMem, uint8_t iSegReg))
2543{
2544#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2545 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2546#else
2547 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2548#endif
2549}
2550
2551
2552/**
2553 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
2554 */
2555IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2556 RTGCPTR GCPtrMem, uint8_t iSegReg))
2557{
2558#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2559 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2560#else
2561 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2562#endif
2563}
2564
2565
2566/**
2567 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
2568 */
2569IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2570 RTGCPTR GCPtrMem, uint8_t iSegReg))
2571{
2572#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2573 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2574#else
2575 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2576#endif
2577}
2578
2579
2580/**
2581 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
2582 */
2583IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2584 RTGCPTR GCPtrMem, uint8_t iSegReg))
2585{
2586#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2587 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2588#else
2589 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2590#endif
2591}
2592
2593
2594/*********************************************************************************************************************************
2595* Helpers: Flat memory mapping. *
2596*********************************************************************************************************************************/
2597
2598/**
2599 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
2600 * address.
2601 */
2602IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2603{
2604#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2605 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2606#else
2607 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2608#endif
2609}
2610
2611
2612/**
2613 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
2614 */
2615IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2616{
2617#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2618 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2619#else
2620 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2621#endif
2622}
2623
2624
2625/**
2626 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
2627 */
2628IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2629{
2630#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2631 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2632#else
2633 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2634#endif
2635}
2636
2637
2638/**
2639 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
2640 */
2641IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2642{
2643#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2644 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2645#else
2646 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2647#endif
2648}
2649
2650
2651/**
2652 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
2653 * address.
2654 */
2655IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2656{
2657#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2658 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2659#else
2660 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2661#endif
2662}
2663
2664
2665/**
2666 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
2667 */
2668IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2669{
2670#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2671 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2672#else
2673 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2674#endif
2675}
2676
2677
2678/**
2679 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
2680 */
2681IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2682{
2683#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2684 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2685#else
2686 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2687#endif
2688}
2689
2690
2691/**
2692 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
2693 */
2694IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2695{
2696#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2697 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2698#else
2699 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2700#endif
2701}
2702
2703
2704/**
2705 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
2706 * address.
2707 */
2708IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2709{
2710#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2711 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2712#else
2713 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2714#endif
2715}
2716
2717
2718/**
2719 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
2720 */
2721IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2722{
2723#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2724 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2725#else
2726 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2727#endif
2728}
2729
2730
2731/**
2732 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
2733 */
2734IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2735{
2736#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2737 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2738#else
2739 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2740#endif
2741}
2742
2743
2744/**
2745 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
2746 */
2747IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2748{
2749#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2750 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2751#else
2752 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2753#endif
2754}
2755
2756
2757/**
2758 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
2759 * address.
2760 */
2761IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2762{
2763#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2764 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2765#else
2766 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2767#endif
2768}
2769
2770
2771/**
2772 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
2773 */
2774IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2775{
2776#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2777 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2778#else
2779 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2780#endif
2781}
2782
2783
2784/**
2785 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
2786 */
2787IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2788{
2789#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2790 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2791#else
2792 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2793#endif
2794}
2795
2796
2797/**
2798 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
2799 */
2800IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2801{
2802#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2803 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2804#else
2805 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2806#endif
2807}
2808
2809
2810/**
2811 * Used by TB code to map 80-bit float data writeonly w/ flat address.
2812 */
2813IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2814{
2815#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2816 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2817#else
2818 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2819#endif
2820}
2821
2822
2823/**
2824 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
2825 */
2826IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2827{
2828#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2829 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2830#else
2831 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2832#endif
2833}
2834
2835
2836/**
2837 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
2838 * address.
2839 */
2840IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2841{
2842#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2843 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2844#else
2845 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2846#endif
2847}
2848
2849
2850/**
2851 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
2852 */
2853IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2854{
2855#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2856 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2857#else
2858 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2859#endif
2860}
2861
2862
2863/**
2864 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
2865 */
2866IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2867{
2868#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2869 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2870#else
2871 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2872#endif
2873}
2874
2875
2876/**
2877 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
2878 */
2879IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2880{
2881#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2882 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2883#else
2884 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2885#endif
2886}
2887
2888
2889/*********************************************************************************************************************************
2890* Helpers: Commit, rollback & unmap *
2891*********************************************************************************************************************************/
2892
2893/**
2894 * Used by TB code to commit and unmap a read-write memory mapping.
2895 */
2896IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2897{
2898 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
2899}
2900
2901
2902/**
2903 * Used by TB code to commit and unmap a read-write memory mapping.
2904 */
2905IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2906{
2907 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2908}
2909
2910
2911/**
2912 * Used by TB code to commit and unmap a write-only memory mapping.
2913 */
2914IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2915{
2916 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2917}
2918
2919
2920/**
2921 * Used by TB code to commit and unmap a read-only memory mapping.
2922 */
2923IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2924{
2925 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2926}
2927
2928
2929/**
2930 * Reinitializes the native recompiler state.
2931 *
2932 * Called before starting a new recompile job.
2933 */
2934static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2935{
2936 pReNative->cLabels = 0;
2937 pReNative->bmLabelTypes = 0;
2938 pReNative->cFixups = 0;
2939#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2940 pReNative->pDbgInfo->cEntries = 0;
2941#endif
2942 pReNative->pTbOrg = pTb;
2943 pReNative->cCondDepth = 0;
2944 pReNative->uCondSeqNo = 0;
2945 pReNative->uCheckIrqSeqNo = 0;
2946 pReNative->uTlbSeqNo = 0;
2947
2948#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2949 pReNative->Core.offPc = 0;
2950 pReNative->Core.cInstrPcUpdateSkipped = 0;
2951#endif
2952 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2953#if IEMNATIVE_HST_GREG_COUNT < 32
2954 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2955#endif
2956 ;
2957 pReNative->Core.bmHstRegsWithGstShadow = 0;
2958 pReNative->Core.bmGstRegShadows = 0;
2959 pReNative->Core.bmVars = 0;
2960 pReNative->Core.bmStack = 0;
2961 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
2962 pReNative->Core.u64ArgVars = UINT64_MAX;
2963
2964 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 13);
2965 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
2966 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
2967 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
2968 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
2969 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
2970 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
2971 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
2972 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
2973 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
2974 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
2975 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
2976 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
2977 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
2978
2979 /* Full host register reinit: */
2980 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
2981 {
2982 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
2983 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
2984 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
2985 }
2986
2987 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
2988 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
2989#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2990 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
2991#endif
2992#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2993 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2994#endif
2995#ifdef IEMNATIVE_REG_FIXED_TMP1
2996 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
2997#endif
2998#ifdef IEMNATIVE_REG_FIXED_PC_DBG
2999 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
3000#endif
3001 );
3002 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3003 {
3004 fRegs &= ~RT_BIT_32(idxReg);
3005 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
3006 }
3007
3008 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
3009#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3010 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
3011#endif
3012#ifdef IEMNATIVE_REG_FIXED_TMP0
3013 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3014#endif
3015#ifdef IEMNATIVE_REG_FIXED_TMP1
3016 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
3017#endif
3018#ifdef IEMNATIVE_REG_FIXED_PC_DBG
3019 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
3020#endif
3021
3022#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3023# ifdef RT_ARCH_ARM64
3024 /*
3025 * Arm64 has 32 128-bit registers only, in order to support emulating 256-bit registers we pair
3026 * two real registers statically to one virtual for now, leaving us with only 16 256-bit registers.
3027 * We always pair v0 with v1, v2 with v3, etc. so we mark the higher register as fixed here during init
3028 * and the register allocator assumes that it will be always free when the lower is picked.
3029 */
3030 uint32_t const fFixedAdditional = UINT32_C(0xaaaaaaaa);
3031# else
3032 uint32_t const fFixedAdditional = 0;
3033# endif
3034
3035 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
3036 | fFixedAdditional
3037# if IEMNATIVE_HST_SIMD_REG_COUNT < 32
3038 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
3039# endif
3040 ;
3041 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
3042 pReNative->Core.bmGstSimdRegShadows = 0;
3043 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
3044 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
3045
3046 /* Full host register reinit: */
3047 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
3048 {
3049 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
3050 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
3051 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
3052 }
3053
3054 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK | fFixedAdditional;
3055 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3056 {
3057 fRegs &= ~RT_BIT_32(idxReg);
3058 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
3059 }
3060
3061#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
3062 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3063#endif
3064
3065#endif
3066
3067 return pReNative;
3068}
3069
3070
3071/**
3072 * Allocates and initializes the native recompiler state.
3073 *
3074 * This is called the first time an EMT wants to recompile something.
3075 *
3076 * @returns Pointer to the new recompiler state.
3077 * @param pVCpu The cross context virtual CPU structure of the calling
3078 * thread.
3079 * @param pTb The TB that's about to be recompiled.
3080 * @thread EMT(pVCpu)
3081 */
3082static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
3083{
3084 VMCPU_ASSERT_EMT(pVCpu);
3085
3086 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
3087 AssertReturn(pReNative, NULL);
3088
3089 /*
3090 * Try allocate all the buffers and stuff we need.
3091 */
3092 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
3093 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
3094 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
3095#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3096 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
3097#endif
3098 if (RT_LIKELY( pReNative->pInstrBuf
3099 && pReNative->paLabels
3100 && pReNative->paFixups)
3101#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3102 && pReNative->pDbgInfo
3103#endif
3104 )
3105 {
3106 /*
3107 * Set the buffer & array sizes on success.
3108 */
3109 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
3110 pReNative->cLabelsAlloc = _8K;
3111 pReNative->cFixupsAlloc = _16K;
3112#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3113 pReNative->cDbgInfoAlloc = _16K;
3114#endif
3115
3116 /* Other constant stuff: */
3117 pReNative->pVCpu = pVCpu;
3118
3119 /*
3120 * Done, just need to save it and reinit it.
3121 */
3122 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
3123 return iemNativeReInit(pReNative, pTb);
3124 }
3125
3126 /*
3127 * Failed. Cleanup and return.
3128 */
3129 AssertFailed();
3130 RTMemFree(pReNative->pInstrBuf);
3131 RTMemFree(pReNative->paLabels);
3132 RTMemFree(pReNative->paFixups);
3133#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3134 RTMemFree(pReNative->pDbgInfo);
3135#endif
3136 RTMemFree(pReNative);
3137 return NULL;
3138}
3139
3140
3141/**
3142 * Creates a label
3143 *
3144 * If the label does not yet have a defined position,
3145 * call iemNativeLabelDefine() later to set it.
3146 *
3147 * @returns Label ID. Throws VBox status code on failure, so no need to check
3148 * the return value.
3149 * @param pReNative The native recompile state.
3150 * @param enmType The label type.
3151 * @param offWhere The instruction offset of the label. UINT32_MAX if the
3152 * label is not yet defined (default).
3153 * @param uData Data associated with the lable. Only applicable to
3154 * certain type of labels. Default is zero.
3155 */
3156DECL_HIDDEN_THROW(uint32_t)
3157iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3158 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
3159{
3160 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
3161
3162 /*
3163 * Locate existing label definition.
3164 *
3165 * This is only allowed for forward declarations where offWhere=UINT32_MAX
3166 * and uData is zero.
3167 */
3168 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3169 uint32_t const cLabels = pReNative->cLabels;
3170 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
3171#ifndef VBOX_STRICT
3172 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
3173 && offWhere == UINT32_MAX
3174 && uData == 0
3175#endif
3176 )
3177 {
3178#ifndef VBOX_STRICT
3179 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
3180 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3181 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
3182 if (idxLabel < pReNative->cLabels)
3183 return idxLabel;
3184#else
3185 for (uint32_t i = 0; i < cLabels; i++)
3186 if ( paLabels[i].enmType == enmType
3187 && paLabels[i].uData == uData)
3188 {
3189 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3190 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3191 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
3192 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
3193 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3194 return i;
3195 }
3196 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
3197 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3198#endif
3199 }
3200
3201 /*
3202 * Make sure we've got room for another label.
3203 */
3204 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
3205 { /* likely */ }
3206 else
3207 {
3208 uint32_t cNew = pReNative->cLabelsAlloc;
3209 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3210 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3211 cNew *= 2;
3212 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
3213 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
3214 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
3215 pReNative->paLabels = paLabels;
3216 pReNative->cLabelsAlloc = cNew;
3217 }
3218
3219 /*
3220 * Define a new label.
3221 */
3222 paLabels[cLabels].off = offWhere;
3223 paLabels[cLabels].enmType = enmType;
3224 paLabels[cLabels].uData = uData;
3225 pReNative->cLabels = cLabels + 1;
3226
3227 Assert((unsigned)enmType < 64);
3228 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
3229
3230 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3231 {
3232 Assert(uData == 0);
3233 pReNative->aidxUniqueLabels[enmType] = cLabels;
3234 }
3235
3236 if (offWhere != UINT32_MAX)
3237 {
3238#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3239 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3240 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
3241#endif
3242 }
3243 return cLabels;
3244}
3245
3246
3247/**
3248 * Defines the location of an existing label.
3249 *
3250 * @param pReNative The native recompile state.
3251 * @param idxLabel The label to define.
3252 * @param offWhere The position.
3253 */
3254DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
3255{
3256 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
3257 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
3258 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
3259 pLabel->off = offWhere;
3260#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3261 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3262 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
3263#endif
3264}
3265
3266
3267/**
3268 * Looks up a lable.
3269 *
3270 * @returns Label ID if found, UINT32_MAX if not.
3271 */
3272static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3273 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
3274{
3275 Assert((unsigned)enmType < 64);
3276 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
3277 {
3278 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3279 return pReNative->aidxUniqueLabels[enmType];
3280
3281 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3282 uint32_t const cLabels = pReNative->cLabels;
3283 for (uint32_t i = 0; i < cLabels; i++)
3284 if ( paLabels[i].enmType == enmType
3285 && paLabels[i].uData == uData
3286 && ( paLabels[i].off == offWhere
3287 || offWhere == UINT32_MAX
3288 || paLabels[i].off == UINT32_MAX))
3289 return i;
3290 }
3291 return UINT32_MAX;
3292}
3293
3294
3295/**
3296 * Adds a fixup.
3297 *
3298 * @throws VBox status code (int) on failure.
3299 * @param pReNative The native recompile state.
3300 * @param offWhere The instruction offset of the fixup location.
3301 * @param idxLabel The target label ID for the fixup.
3302 * @param enmType The fixup type.
3303 * @param offAddend Fixup addend if applicable to the type. Default is 0.
3304 */
3305DECL_HIDDEN_THROW(void)
3306iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
3307 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
3308{
3309 Assert(idxLabel <= UINT16_MAX);
3310 Assert((unsigned)enmType <= UINT8_MAX);
3311
3312 /*
3313 * Make sure we've room.
3314 */
3315 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
3316 uint32_t const cFixups = pReNative->cFixups;
3317 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
3318 { /* likely */ }
3319 else
3320 {
3321 uint32_t cNew = pReNative->cFixupsAlloc;
3322 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3323 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3324 cNew *= 2;
3325 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
3326 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
3327 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
3328 pReNative->paFixups = paFixups;
3329 pReNative->cFixupsAlloc = cNew;
3330 }
3331
3332 /*
3333 * Add the fixup.
3334 */
3335 paFixups[cFixups].off = offWhere;
3336 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
3337 paFixups[cFixups].enmType = enmType;
3338 paFixups[cFixups].offAddend = offAddend;
3339 pReNative->cFixups = cFixups + 1;
3340}
3341
3342
3343/**
3344 * Slow code path for iemNativeInstrBufEnsure.
3345 */
3346DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
3347{
3348 /* Double the buffer size till we meet the request. */
3349 uint32_t cNew = pReNative->cInstrBufAlloc;
3350 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
3351 do
3352 cNew *= 2;
3353 while (cNew < off + cInstrReq);
3354
3355 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
3356#ifdef RT_ARCH_ARM64
3357 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
3358#else
3359 uint32_t const cbMaxInstrBuf = _2M;
3360#endif
3361 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
3362
3363 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
3364 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
3365
3366#ifdef VBOX_STRICT
3367 pReNative->offInstrBufChecked = off + cInstrReq;
3368#endif
3369 pReNative->cInstrBufAlloc = cNew;
3370 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
3371}
3372
3373#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3374
3375/**
3376 * Grows the static debug info array used during recompilation.
3377 *
3378 * @returns Pointer to the new debug info block; throws VBox status code on
3379 * failure, so no need to check the return value.
3380 */
3381DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3382{
3383 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
3384 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
3385 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
3386 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
3387 pReNative->pDbgInfo = pDbgInfo;
3388 pReNative->cDbgInfoAlloc = cNew;
3389 return pDbgInfo;
3390}
3391
3392
3393/**
3394 * Adds a new debug info uninitialized entry, returning the pointer to it.
3395 */
3396DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3397{
3398 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
3399 { /* likely */ }
3400 else
3401 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
3402 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
3403}
3404
3405
3406/**
3407 * Debug Info: Adds a native offset record, if necessary.
3408 */
3409static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3410{
3411 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
3412
3413 /*
3414 * Search backwards to see if we've got a similar record already.
3415 */
3416 uint32_t idx = pDbgInfo->cEntries;
3417 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
3418 while (idx-- > idxStop)
3419 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
3420 {
3421 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
3422 return;
3423 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
3424 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
3425 break;
3426 }
3427
3428 /*
3429 * Add it.
3430 */
3431 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
3432 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
3433 pEntry->NativeOffset.offNative = off;
3434}
3435
3436
3437/**
3438 * Debug Info: Record info about a label.
3439 */
3440static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
3441{
3442 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3443 pEntry->Label.uType = kIemTbDbgEntryType_Label;
3444 pEntry->Label.uUnused = 0;
3445 pEntry->Label.enmLabel = (uint8_t)enmType;
3446 pEntry->Label.uData = uData;
3447}
3448
3449
3450/**
3451 * Debug Info: Record info about a threaded call.
3452 */
3453static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
3454{
3455 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3456 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
3457 pEntry->ThreadedCall.fRecompiled = fRecompiled;
3458 pEntry->ThreadedCall.uUnused = 0;
3459 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
3460}
3461
3462
3463/**
3464 * Debug Info: Record info about a new guest instruction.
3465 */
3466static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
3467{
3468 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3469 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
3470 pEntry->GuestInstruction.uUnused = 0;
3471 pEntry->GuestInstruction.fExec = fExec;
3472}
3473
3474
3475/**
3476 * Debug Info: Record info about guest register shadowing.
3477 */
3478static void iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
3479 uint8_t idxHstReg = UINT8_MAX, uint8_t idxHstRegPrev = UINT8_MAX)
3480{
3481 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3482 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
3483 pEntry->GuestRegShadowing.uUnused = 0;
3484 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
3485 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
3486 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
3487}
3488
3489
3490# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3491/**
3492 * Debug Info: Record info about guest register shadowing.
3493 */
3494static void iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
3495 uint8_t idxHstSimdReg = UINT8_MAX, uint8_t idxHstSimdRegPrev = UINT8_MAX)
3496{
3497 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3498 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
3499 pEntry->GuestSimdRegShadowing.uUnused = 0;
3500 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
3501 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
3502 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
3503}
3504# endif
3505
3506
3507# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3508/**
3509 * Debug Info: Record info about delayed RIP updates.
3510 */
3511static void iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t offPc, uint32_t cInstrSkipped)
3512{
3513 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3514 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
3515 pEntry->DelayedPcUpdate.offPc = offPc;
3516 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
3517}
3518# endif
3519
3520#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
3521
3522
3523/*********************************************************************************************************************************
3524* Register Allocator *
3525*********************************************************************************************************************************/
3526
3527/**
3528 * Register parameter indexes (indexed by argument number).
3529 */
3530DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
3531{
3532 IEMNATIVE_CALL_ARG0_GREG,
3533 IEMNATIVE_CALL_ARG1_GREG,
3534 IEMNATIVE_CALL_ARG2_GREG,
3535 IEMNATIVE_CALL_ARG3_GREG,
3536#if defined(IEMNATIVE_CALL_ARG4_GREG)
3537 IEMNATIVE_CALL_ARG4_GREG,
3538# if defined(IEMNATIVE_CALL_ARG5_GREG)
3539 IEMNATIVE_CALL_ARG5_GREG,
3540# if defined(IEMNATIVE_CALL_ARG6_GREG)
3541 IEMNATIVE_CALL_ARG6_GREG,
3542# if defined(IEMNATIVE_CALL_ARG7_GREG)
3543 IEMNATIVE_CALL_ARG7_GREG,
3544# endif
3545# endif
3546# endif
3547#endif
3548};
3549
3550/**
3551 * Call register masks indexed by argument count.
3552 */
3553DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
3554{
3555 0,
3556 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
3557 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
3558 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
3559 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3560 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
3561#if defined(IEMNATIVE_CALL_ARG4_GREG)
3562 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3563 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
3564# if defined(IEMNATIVE_CALL_ARG5_GREG)
3565 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3566 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
3567# if defined(IEMNATIVE_CALL_ARG6_GREG)
3568 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3569 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3570 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
3571# if defined(IEMNATIVE_CALL_ARG7_GREG)
3572 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3573 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3574 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
3575# endif
3576# endif
3577# endif
3578#endif
3579};
3580
3581#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
3582/**
3583 * BP offset of the stack argument slots.
3584 *
3585 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
3586 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
3587 */
3588DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
3589{
3590 IEMNATIVE_FP_OFF_STACK_ARG0,
3591# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
3592 IEMNATIVE_FP_OFF_STACK_ARG1,
3593# endif
3594# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
3595 IEMNATIVE_FP_OFF_STACK_ARG2,
3596# endif
3597# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
3598 IEMNATIVE_FP_OFF_STACK_ARG3,
3599# endif
3600};
3601AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
3602#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
3603
3604/**
3605 * Info about shadowed guest register values.
3606 * @see IEMNATIVEGSTREG
3607 */
3608static struct
3609{
3610 /** Offset in VMCPU. */
3611 uint32_t off;
3612 /** The field size. */
3613 uint8_t cb;
3614 /** Name (for logging). */
3615 const char *pszName;
3616} const g_aGstShadowInfo[] =
3617{
3618#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
3619 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
3620 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
3621 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
3622 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
3623 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
3624 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
3625 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
3626 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
3627 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
3628 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
3629 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
3630 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
3631 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
3632 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
3633 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
3634 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
3635 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
3636 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
3637 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
3638 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
3639 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
3640 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
3641 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
3642 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
3643 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
3644 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
3645 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
3646 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
3647 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
3648 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
3649 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
3650 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
3651 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
3652 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
3653 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
3654 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
3655 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
3656 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
3657 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
3658 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
3659 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
3660 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
3661 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
3662 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
3663 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
3664 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
3665 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
3666 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
3667#undef CPUMCTX_OFF_AND_SIZE
3668};
3669AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
3670
3671
3672/** Host CPU general purpose register names. */
3673DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
3674{
3675#ifdef RT_ARCH_AMD64
3676 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
3677#elif RT_ARCH_ARM64
3678 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
3679 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
3680#else
3681# error "port me"
3682#endif
3683};
3684
3685
3686DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
3687 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
3688{
3689 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3690
3691 pReNative->Core.aHstRegs[idxReg].enmWhat = enmWhat;
3692 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3693 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
3694 return (uint8_t)idxReg;
3695}
3696
3697
3698#if 0 /* unused */
3699/**
3700 * Tries to locate a suitable register in the given register mask.
3701 *
3702 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3703 * failed.
3704 *
3705 * @returns Host register number on success, returns UINT8_MAX on failure.
3706 */
3707static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
3708{
3709 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3710 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3711 if (fRegs)
3712 {
3713 /** @todo pick better here: */
3714 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
3715
3716 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3717 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3718 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3719 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3720
3721 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3722 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3723 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3724 return idxReg;
3725 }
3726 return UINT8_MAX;
3727}
3728#endif /* unused */
3729
3730
3731/**
3732 * Locate a register, possibly freeing one up.
3733 *
3734 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3735 * failed.
3736 *
3737 * @returns Host register number on success. Returns UINT8_MAX if no registers
3738 * found, the caller is supposed to deal with this and raise a
3739 * allocation type specific status code (if desired).
3740 *
3741 * @throws VBox status code if we're run into trouble spilling a variable of
3742 * recording debug info. Does NOT throw anything if we're out of
3743 * registers, though.
3744 */
3745static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3746 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3747{
3748 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
3749 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3750 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3751
3752 /*
3753 * Try a freed register that's shadowing a guest register.
3754 */
3755 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3756 if (fRegs)
3757 {
3758 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
3759
3760#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3761 /*
3762 * When we have livness information, we use it to kick out all shadowed
3763 * guest register that will not be needed any more in this TB. If we're
3764 * lucky, this may prevent us from ending up here again.
3765 *
3766 * Note! We must consider the previous entry here so we don't free
3767 * anything that the current threaded function requires (current
3768 * entry is produced by the next threaded function).
3769 */
3770 uint32_t const idxCurCall = pReNative->idxCurCall;
3771 if (idxCurCall > 0)
3772 {
3773 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
3774
3775# ifndef IEMLIVENESS_EXTENDED_LAYOUT
3776 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
3777 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
3778 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
3779#else
3780 /* Construct a mask of the registers not in the read or write state.
3781 Note! We could skips writes, if they aren't from us, as this is just
3782 a hack to prevent trashing registers that have just been written
3783 or will be written when we retire the current instruction. */
3784 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3785 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3786 & IEMLIVENESSBIT_MASK;
3787#endif
3788 /* Merge EFLAGS. */
3789 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
3790 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
3791 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
3792 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
3793 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
3794
3795 /* If it matches any shadowed registers. */
3796 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
3797 {
3798 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
3799 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
3800 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
3801
3802 /* See if we've got any unshadowed registers we can return now. */
3803 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
3804 if (fUnshadowedRegs)
3805 {
3806 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
3807 return (fPreferVolatile
3808 ? ASMBitFirstSetU32(fUnshadowedRegs)
3809 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3810 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
3811 - 1;
3812 }
3813 }
3814 }
3815#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
3816
3817 unsigned const idxReg = (fPreferVolatile
3818 ? ASMBitFirstSetU32(fRegs)
3819 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3820 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
3821 - 1;
3822
3823 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3824 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3825 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3826 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3827
3828 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3829 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3830 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3831 return idxReg;
3832 }
3833
3834 /*
3835 * Try free up a variable that's in a register.
3836 *
3837 * We do two rounds here, first evacuating variables we don't need to be
3838 * saved on the stack, then in the second round move things to the stack.
3839 */
3840 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
3841 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3842 {
3843 uint32_t fVars = pReNative->Core.bmVars;
3844 while (fVars)
3845 {
3846 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3847 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3848 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3849 && (RT_BIT_32(idxReg) & fRegMask)
3850 && ( iLoop == 0
3851 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3852 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3853 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3854 {
3855 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3856 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3857 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3858 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3859 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3860 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3861
3862 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3863 {
3864 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
3865 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3866 }
3867
3868 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3869 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3870
3871 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3872 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3873 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3874 return idxReg;
3875 }
3876 fVars &= ~RT_BIT_32(idxVar);
3877 }
3878 }
3879
3880 return UINT8_MAX;
3881}
3882
3883
3884/**
3885 * Reassigns a variable to a different register specified by the caller.
3886 *
3887 * @returns The new code buffer position.
3888 * @param pReNative The native recompile state.
3889 * @param off The current code buffer position.
3890 * @param idxVar The variable index.
3891 * @param idxRegOld The old host register number.
3892 * @param idxRegNew The new host register number.
3893 * @param pszCaller The caller for logging.
3894 */
3895static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3896 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3897{
3898 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3899 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
3900 RT_NOREF(pszCaller);
3901
3902 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3903
3904 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3905 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
3906 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3907 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3908
3909 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3910 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3911 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3912 if (fGstRegShadows)
3913 {
3914 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3915 | RT_BIT_32(idxRegNew);
3916 while (fGstRegShadows)
3917 {
3918 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3919 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3920
3921 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3922 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3923 }
3924 }
3925
3926 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
3927 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3928 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3929 return off;
3930}
3931
3932
3933/**
3934 * Moves a variable to a different register or spills it onto the stack.
3935 *
3936 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3937 * kinds can easily be recreated if needed later.
3938 *
3939 * @returns The new code buffer position.
3940 * @param pReNative The native recompile state.
3941 * @param off The current code buffer position.
3942 * @param idxVar The variable index.
3943 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3944 * call-volatile registers.
3945 */
3946static uint32_t iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3947 uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK)
3948{
3949 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3950 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3951 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
3952 Assert(!pVar->fRegAcquired);
3953
3954 uint8_t const idxRegOld = pVar->idxReg;
3955 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3956 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3957 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3958 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3959 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3960 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3961 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3962 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3963
3964
3965 /** @todo Add statistics on this.*/
3966 /** @todo Implement basic variable liveness analysis (python) so variables
3967 * can be freed immediately once no longer used. This has the potential to
3968 * be trashing registers and stack for dead variables.
3969 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
3970
3971 /*
3972 * First try move it to a different register, as that's cheaper.
3973 */
3974 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3975 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3976 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3977 if (fRegs)
3978 {
3979 /* Avoid using shadow registers, if possible. */
3980 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3981 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3982 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3983 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3984 }
3985
3986 /*
3987 * Otherwise we must spill the register onto the stack.
3988 */
3989 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3990 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3991 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3992 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3993
3994 pVar->idxReg = UINT8_MAX;
3995 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3996 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3997 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3998 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3999 return off;
4000}
4001
4002
4003/**
4004 * Allocates a temporary host general purpose register.
4005 *
4006 * This may emit code to save register content onto the stack in order to free
4007 * up a register.
4008 *
4009 * @returns The host register number; throws VBox status code on failure,
4010 * so no need to check the return value.
4011 * @param pReNative The native recompile state.
4012 * @param poff Pointer to the variable with the code buffer position.
4013 * This will be update if we need to move a variable from
4014 * register to stack in order to satisfy the request.
4015 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4016 * registers (@c true, default) or the other way around
4017 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4018 */
4019DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
4020{
4021 /*
4022 * Try find a completely unused register, preferably a call-volatile one.
4023 */
4024 uint8_t idxReg;
4025 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4026 & ~pReNative->Core.bmHstRegsWithGstShadow
4027 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
4028 if (fRegs)
4029 {
4030 if (fPreferVolatile)
4031 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4032 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4033 else
4034 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4035 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4036 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4037 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4038 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4039 }
4040 else
4041 {
4042 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
4043 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4044 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4045 }
4046 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4047}
4048
4049
4050/**
4051 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
4052 * registers.
4053 *
4054 * @returns The host register number; throws VBox status code on failure,
4055 * so no need to check the return value.
4056 * @param pReNative The native recompile state.
4057 * @param poff Pointer to the variable with the code buffer position.
4058 * This will be update if we need to move a variable from
4059 * register to stack in order to satisfy the request.
4060 * @param fRegMask Mask of acceptable registers.
4061 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4062 * registers (@c true, default) or the other way around
4063 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4064 */
4065DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
4066 bool fPreferVolatile /*= true*/)
4067{
4068 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
4069 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
4070
4071 /*
4072 * Try find a completely unused register, preferably a call-volatile one.
4073 */
4074 uint8_t idxReg;
4075 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4076 & ~pReNative->Core.bmHstRegsWithGstShadow
4077 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
4078 & fRegMask;
4079 if (fRegs)
4080 {
4081 if (fPreferVolatile)
4082 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4083 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4084 else
4085 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4086 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4087 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4088 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4089 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4090 }
4091 else
4092 {
4093 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
4094 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4095 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4096 }
4097 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4098}
4099
4100
4101/**
4102 * Allocates a temporary register for loading an immediate value into.
4103 *
4104 * This will emit code to load the immediate, unless there happens to be an
4105 * unused register with the value already loaded.
4106 *
4107 * The caller will not modify the returned register, it must be considered
4108 * read-only. Free using iemNativeRegFreeTmpImm.
4109 *
4110 * @returns The host register number; throws VBox status code on failure, so no
4111 * need to check the return value.
4112 * @param pReNative The native recompile state.
4113 * @param poff Pointer to the variable with the code buffer position.
4114 * @param uImm The immediate value that the register must hold upon
4115 * return.
4116 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4117 * registers (@c true, default) or the other way around
4118 * (@c false).
4119 *
4120 * @note Reusing immediate values has not been implemented yet.
4121 */
4122DECL_HIDDEN_THROW(uint8_t)
4123iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
4124{
4125 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
4126 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
4127 return idxReg;
4128}
4129
4130#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4131
4132# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4133/**
4134 * Helper for iemNativeLivenessGetStateByGstReg.
4135 *
4136 * @returns IEMLIVENESS_STATE_XXX
4137 * @param fMergedStateExp2 This is the RT_BIT_32() of each sub-state
4138 * ORed together.
4139 */
4140DECL_FORCE_INLINE(uint32_t)
4141iemNativeLivenessMergeExpandedEFlagsState(uint32_t fMergedStateExp2)
4142{
4143 /* INPUT trumps anything else. */
4144 if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_INPUT))
4145 return IEMLIVENESS_STATE_INPUT;
4146
4147 /* CLOBBERED trumps XCPT_OR_CALL and UNUSED. */
4148 if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_CLOBBERED))
4149 {
4150 /* If not all sub-fields are clobbered they must be considered INPUT. */
4151 if (fMergedStateExp2 & (RT_BIT_32(IEMLIVENESS_STATE_UNUSED) | RT_BIT_32(IEMLIVENESS_STATE_XCPT_OR_CALL)))
4152 return IEMLIVENESS_STATE_INPUT;
4153 return IEMLIVENESS_STATE_CLOBBERED;
4154 }
4155
4156 /* XCPT_OR_CALL trumps UNUSED. */
4157 if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_XCPT_OR_CALL))
4158 return IEMLIVENESS_STATE_XCPT_OR_CALL;
4159
4160 return IEMLIVENESS_STATE_UNUSED;
4161}
4162# endif /* !IEMLIVENESS_EXTENDED_LAYOUT */
4163
4164
4165DECL_FORCE_INLINE(uint32_t)
4166iemNativeLivenessGetStateByGstRegEx(PCIEMLIVENESSENTRY pLivenessEntry, unsigned enmGstRegEx)
4167{
4168# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4169 return ((pLivenessEntry->Bit0.bm64 >> enmGstRegEx) & 1)
4170 | (((pLivenessEntry->Bit1.bm64 >> enmGstRegEx) << 1) & 2);
4171# else
4172 return ( (pLivenessEntry->Bit0.bm64 >> enmGstRegEx) & 1)
4173 | (((pLivenessEntry->Bit1.bm64 >> enmGstRegEx) << 1) & 2)
4174 | (((pLivenessEntry->Bit2.bm64 >> enmGstRegEx) << 2) & 4)
4175 | (((pLivenessEntry->Bit3.bm64 >> enmGstRegEx) << 2) & 8);
4176# endif
4177}
4178
4179
4180DECL_FORCE_INLINE(uint32_t)
4181iemNativeLivenessGetStateByGstReg(PCIEMLIVENESSENTRY pLivenessEntry, IEMNATIVEGSTREG enmGstReg)
4182{
4183 uint32_t uRet = iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, (unsigned)enmGstReg);
4184 if (enmGstReg == kIemNativeGstReg_EFlags)
4185 {
4186 /* Merge the eflags states to one. */
4187# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4188 uRet = RT_BIT_32(uRet);
4189 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflCf | (pLivenessEntry->Bit1.fEflCf << 1));
4190 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflPf | (pLivenessEntry->Bit1.fEflPf << 1));
4191 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflAf | (pLivenessEntry->Bit1.fEflAf << 1));
4192 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflZf | (pLivenessEntry->Bit1.fEflZf << 1));
4193 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflSf | (pLivenessEntry->Bit1.fEflSf << 1));
4194 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflOf | (pLivenessEntry->Bit1.fEflOf << 1));
4195 uRet = iemNativeLivenessMergeExpandedEFlagsState(uRet);
4196# else
4197 AssertCompile(IEMLIVENESSBIT_IDX_EFL_OTHER == (unsigned)kIemNativeGstReg_EFlags);
4198 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_CF);
4199 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_PF);
4200 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_AF);
4201 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_ZF);
4202 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_SF);
4203 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_OF);
4204# endif
4205 }
4206 return uRet;
4207}
4208
4209
4210# ifdef VBOX_STRICT
4211/** For assertions only, user checks that idxCurCall isn't zerow. */
4212DECL_FORCE_INLINE(uint32_t)
4213iemNativeLivenessGetPrevStateByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg)
4214{
4215 return iemNativeLivenessGetStateByGstReg(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], enmGstReg);
4216}
4217# endif /* VBOX_STRICT */
4218
4219#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4220
4221/**
4222 * Marks host register @a idxHstReg as containing a shadow copy of guest
4223 * register @a enmGstReg.
4224 *
4225 * ASSUMES that caller has made sure @a enmGstReg is not associated with any
4226 * host register before calling.
4227 */
4228DECL_FORCE_INLINE(void)
4229iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
4230{
4231 Assert(!(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg)));
4232 Assert(!pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4233 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
4234
4235 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxHstReg;
4236 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg); /** @todo why? not OR? */
4237 pReNative->Core.bmGstRegShadows |= RT_BIT_64(enmGstReg);
4238 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
4239#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4240 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4241 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxHstReg);
4242#else
4243 RT_NOREF(off);
4244#endif
4245}
4246
4247
4248/**
4249 * Clear any guest register shadow claims from @a idxHstReg.
4250 *
4251 * The register does not need to be shadowing any guest registers.
4252 */
4253DECL_FORCE_INLINE(void)
4254iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off)
4255{
4256 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4257 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
4258 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4259 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4260 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4261
4262#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4263 uint64_t fGstRegs = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4264 if (fGstRegs)
4265 {
4266 Assert(fGstRegs < RT_BIT_64(kIemNativeGstReg_End));
4267 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4268 while (fGstRegs)
4269 {
4270 unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4271 fGstRegs &= ~RT_BIT_64(iGstReg);
4272 iemNativeDbgInfoAddGuestRegShadowing(pReNative, (IEMNATIVEGSTREG)iGstReg, UINT8_MAX, idxHstReg);
4273 }
4274 }
4275#else
4276 RT_NOREF(off);
4277#endif
4278
4279 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4280 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4281 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4282}
4283
4284
4285/**
4286 * Clear guest register shadow claim regarding @a enmGstReg from @a idxHstReg
4287 * and global overview flags.
4288 */
4289DECL_FORCE_INLINE(void)
4290iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
4291{
4292 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4293 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4294 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
4295 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4296 Assert(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg));
4297 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4298 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4299
4300#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4301 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4302 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, UINT8_MAX, idxHstReg);
4303#else
4304 RT_NOREF(off);
4305#endif
4306
4307 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~RT_BIT_64(enmGstReg);
4308 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4309 if (!fGstRegShadowsNew)
4310 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4311 pReNative->Core.bmGstRegShadows &= ~RT_BIT_64(enmGstReg);
4312}
4313
4314
4315#if 0 /* unused */
4316/**
4317 * Clear any guest register shadow claim for @a enmGstReg.
4318 */
4319DECL_FORCE_INLINE(void)
4320iemNativeRegClearGstRegShadowingByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg, uint32_t off)
4321{
4322 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4323 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4324 {
4325 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] < RT_ELEMENTS(pReNative->Core.aHstRegs));
4326 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
4327 }
4328}
4329#endif
4330
4331
4332/**
4333 * Clear any guest register shadow claim for @a enmGstReg and mark @a idxHstRegNew
4334 * as the new shadow of it.
4335 *
4336 * Unlike the other guest reg shadow helpers, this does the logging for you.
4337 * However, it is the liveness state is not asserted here, the caller must do
4338 * that.
4339 */
4340DECL_FORCE_INLINE(void)
4341iemNativeRegClearAndMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstRegNew,
4342 IEMNATIVEGSTREG enmGstReg, uint32_t off)
4343{
4344 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4345 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4346 {
4347 uint8_t const idxHstRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
4348 Assert(idxHstRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
4349 if (idxHstRegOld == idxHstRegNew)
4350 return;
4351 Log12(("iemNativeRegClearAndMarkAsGstRegShadow: %s for guest %s (from %s)\n", g_apszIemNativeHstRegNames[idxHstRegNew],
4352 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstRegOld]));
4353 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
4354 }
4355 else
4356 Log12(("iemNativeRegClearAndMarkAsGstRegShadow: %s for guest %s\n", g_apszIemNativeHstRegNames[idxHstRegNew],
4357 g_aGstShadowInfo[enmGstReg].pszName));
4358 iemNativeRegMarkAsGstRegShadow(pReNative, idxHstRegNew, enmGstReg, off);
4359}
4360
4361
4362/**
4363 * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
4364 * to @a idxRegTo.
4365 */
4366DECL_FORCE_INLINE(void)
4367iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo,
4368 IEMNATIVEGSTREG enmGstReg, uint32_t off)
4369{
4370 Assert(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
4371 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] == idxRegFrom);
4372 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows)
4373 == pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows
4374 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4375 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows)
4376 == pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows);
4377 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
4378 == RT_BOOL(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows));
4379
4380 uint64_t const fGstRegShadowsFrom = pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & ~RT_BIT_64(enmGstReg);
4381 pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows = fGstRegShadowsFrom;
4382 if (!fGstRegShadowsFrom)
4383 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegFrom);
4384 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxRegTo);
4385 pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows |= RT_BIT_64(enmGstReg);
4386 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxRegTo;
4387#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4388 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4389 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxRegTo, idxRegFrom);
4390#else
4391 RT_NOREF(off);
4392#endif
4393}
4394
4395
4396/**
4397 * Allocates a temporary host general purpose register for keeping a guest
4398 * register value.
4399 *
4400 * Since we may already have a register holding the guest register value,
4401 * code will be emitted to do the loading if that's not the case. Code may also
4402 * be emitted if we have to free up a register to satify the request.
4403 *
4404 * @returns The host register number; throws VBox status code on failure, so no
4405 * need to check the return value.
4406 * @param pReNative The native recompile state.
4407 * @param poff Pointer to the variable with the code buffer
4408 * position. This will be update if we need to move a
4409 * variable from register to stack in order to satisfy
4410 * the request.
4411 * @param enmGstReg The guest register that will is to be updated.
4412 * @param enmIntendedUse How the caller will be using the host register.
4413 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
4414 * register is okay (default). The ASSUMPTION here is
4415 * that the caller has already flushed all volatile
4416 * registers, so this is only applied if we allocate a
4417 * new register.
4418 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
4419 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
4420 */
4421DECL_HIDDEN_THROW(uint8_t)
4422iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
4423 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
4424 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
4425{
4426 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4427#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4428 AssertMsg( fSkipLivenessAssert
4429 || pReNative->idxCurCall == 0
4430 || enmGstReg == kIemNativeGstReg_Pc
4431 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4432 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4433 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
4434 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4435 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
4436 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4437#endif
4438 RT_NOREF(fSkipLivenessAssert);
4439#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
4440 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
4441#endif
4442 uint32_t const fRegMask = !fNoVolatileRegs
4443 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
4444 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4445
4446 /*
4447 * First check if the guest register value is already in a host register.
4448 */
4449 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4450 {
4451 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4452 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4453 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4454 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4455
4456 /* It's not supposed to be allocated... */
4457 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4458 {
4459 /*
4460 * If the register will trash the guest shadow copy, try find a
4461 * completely unused register we can use instead. If that fails,
4462 * we need to disassociate the host reg from the guest reg.
4463 */
4464 /** @todo would be nice to know if preserving the register is in any way helpful. */
4465 /* If the purpose is calculations, try duplicate the register value as
4466 we'll be clobbering the shadow. */
4467 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
4468 && ( ~pReNative->Core.bmHstRegs
4469 & ~pReNative->Core.bmHstRegsWithGstShadow
4470 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
4471 {
4472 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
4473
4474 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4475
4476 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4477 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4478 g_apszIemNativeHstRegNames[idxRegNew]));
4479 idxReg = idxRegNew;
4480 }
4481 /* If the current register matches the restrictions, go ahead and allocate
4482 it for the caller. */
4483 else if (fRegMask & RT_BIT_32(idxReg))
4484 {
4485 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4486 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4487 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4488 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4489 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
4490 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4491 else
4492 {
4493 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
4494 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
4495 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4496 }
4497 }
4498 /* Otherwise, allocate a register that satisfies the caller and transfer
4499 the shadowing if compatible with the intended use. (This basically
4500 means the call wants a non-volatile register (RSP push/pop scenario).) */
4501 else
4502 {
4503 Assert(fNoVolatileRegs);
4504 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
4505 !fNoVolatileRegs
4506 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
4507 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4508 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4509 {
4510 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4511 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
4512 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
4513 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4514 }
4515 else
4516 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4517 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4518 g_apszIemNativeHstRegNames[idxRegNew]));
4519 idxReg = idxRegNew;
4520 }
4521 }
4522 else
4523 {
4524 /*
4525 * Oops. Shadowed guest register already allocated!
4526 *
4527 * Allocate a new register, copy the value and, if updating, the
4528 * guest shadow copy assignment to the new register.
4529 */
4530 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4531 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
4532 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
4533 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
4534
4535 /** @todo share register for readonly access. */
4536 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
4537 enmIntendedUse == kIemNativeGstRegUse_Calculation);
4538
4539 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4540 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4541
4542 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4543 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4544 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
4545 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4546 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4547 else
4548 {
4549 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4550 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
4551 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4552 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4553 }
4554 idxReg = idxRegNew;
4555 }
4556 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
4557
4558#ifdef VBOX_STRICT
4559 /* Strict builds: Check that the value is correct. */
4560 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4561#endif
4562
4563 return idxReg;
4564 }
4565
4566 /*
4567 * Allocate a new register, load it with the guest value and designate it as a copy of the
4568 */
4569 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
4570
4571 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4572 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
4573
4574 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4575 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
4576 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
4577 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4578
4579 return idxRegNew;
4580}
4581
4582
4583/**
4584 * Allocates a temporary host general purpose register that already holds the
4585 * given guest register value.
4586 *
4587 * The use case for this function is places where the shadowing state cannot be
4588 * modified due to branching and such. This will fail if the we don't have a
4589 * current shadow copy handy or if it's incompatible. The only code that will
4590 * be emitted here is value checking code in strict builds.
4591 *
4592 * The intended use can only be readonly!
4593 *
4594 * @returns The host register number, UINT8_MAX if not present.
4595 * @param pReNative The native recompile state.
4596 * @param poff Pointer to the instruction buffer offset.
4597 * Will be updated in strict builds if a register is
4598 * found.
4599 * @param enmGstReg The guest register that will is to be updated.
4600 * @note In strict builds, this may throw instruction buffer growth failures.
4601 * Non-strict builds will not throw anything.
4602 * @sa iemNativeRegAllocTmpForGuestReg
4603 */
4604DECL_HIDDEN_THROW(uint8_t)
4605iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4606{
4607 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4608#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4609 AssertMsg( pReNative->idxCurCall == 0
4610 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4611 || enmGstReg == kIemNativeGstReg_Pc,
4612 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4613#endif
4614
4615 /*
4616 * First check if the guest register value is already in a host register.
4617 */
4618 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4619 {
4620 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4621 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4622 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4623 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4624
4625 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4626 {
4627 /*
4628 * We only do readonly use here, so easy compared to the other
4629 * variant of this code.
4630 */
4631 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4632 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4633 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4634 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
4635 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4636
4637#ifdef VBOX_STRICT
4638 /* Strict builds: Check that the value is correct. */
4639 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4640#else
4641 RT_NOREF(poff);
4642#endif
4643 return idxReg;
4644 }
4645 }
4646
4647 return UINT8_MAX;
4648}
4649
4650
4651/**
4652 * Allocates argument registers for a function call.
4653 *
4654 * @returns New code buffer offset on success; throws VBox status code on failure, so no
4655 * need to check the return value.
4656 * @param pReNative The native recompile state.
4657 * @param off The current code buffer offset.
4658 * @param cArgs The number of arguments the function call takes.
4659 */
4660DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
4661{
4662 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
4663 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
4664 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4665 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4666
4667 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4668 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4669 else if (cArgs == 0)
4670 return true;
4671
4672 /*
4673 * Do we get luck and all register are free and not shadowing anything?
4674 */
4675 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
4676 for (uint32_t i = 0; i < cArgs; i++)
4677 {
4678 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4679 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4680 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4681 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4682 }
4683 /*
4684 * Okay, not lucky so we have to free up the registers.
4685 */
4686 else
4687 for (uint32_t i = 0; i < cArgs; i++)
4688 {
4689 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4690 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
4691 {
4692 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4693 {
4694 case kIemNativeWhat_Var:
4695 {
4696 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4697 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4698 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
4699 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4700 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
4701
4702 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
4703 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4704 else
4705 {
4706 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4707 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4708 }
4709 break;
4710 }
4711
4712 case kIemNativeWhat_Tmp:
4713 case kIemNativeWhat_Arg:
4714 case kIemNativeWhat_rc:
4715 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4716 default:
4717 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
4718 }
4719
4720 }
4721 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4722 {
4723 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4724 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4725 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4726 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4727 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4728 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4729 }
4730 else
4731 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4732 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4733 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4734 }
4735 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
4736 return true;
4737}
4738
4739
4740DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
4741
4742
4743#if 0
4744/**
4745 * Frees a register assignment of any type.
4746 *
4747 * @param pReNative The native recompile state.
4748 * @param idxHstReg The register to free.
4749 *
4750 * @note Does not update variables.
4751 */
4752DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4753{
4754 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4755 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4756 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
4757 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
4758 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
4759 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
4760 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
4761 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
4762 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
4763 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
4764 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4765 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4766 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4767 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4768
4769 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4770 /* no flushing, right:
4771 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4772 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4773 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4774 */
4775}
4776#endif
4777
4778
4779/**
4780 * Frees a temporary register.
4781 *
4782 * Any shadow copies of guest registers assigned to the host register will not
4783 * be flushed by this operation.
4784 */
4785DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4786{
4787 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4788 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
4789 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4790 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
4791 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4792}
4793
4794
4795/**
4796 * Frees a temporary immediate register.
4797 *
4798 * It is assumed that the call has not modified the register, so it still hold
4799 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
4800 */
4801DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4802{
4803 iemNativeRegFreeTmp(pReNative, idxHstReg);
4804}
4805
4806
4807/**
4808 * Frees a register assigned to a variable.
4809 *
4810 * The register will be disassociated from the variable.
4811 */
4812DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4813{
4814 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4815 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4816 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4817 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4818 Assert(pReNative->Core.aVars[idxVar].idxReg == idxHstReg);
4819
4820 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4821 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4822 if (!fFlushShadows)
4823 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4824 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4825 else
4826 {
4827 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4828 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4829 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4830 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4831 uint64_t fGstRegShadows = fGstRegShadowsOld;
4832 while (fGstRegShadows)
4833 {
4834 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4835 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4836
4837 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4838 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4839 }
4840 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4841 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4842 }
4843}
4844
4845
4846/**
4847 * Called right before emitting a call instruction to move anything important
4848 * out of call-volatile registers, free and flush the call-volatile registers,
4849 * optionally freeing argument variables.
4850 *
4851 * @returns New code buffer offset, UINT32_MAX on failure.
4852 * @param pReNative The native recompile state.
4853 * @param off The code buffer offset.
4854 * @param cArgs The number of arguments the function call takes.
4855 * It is presumed that the host register part of these have
4856 * been allocated as such already and won't need moving,
4857 * just freeing.
4858 * @param fKeepVars Mask of variables that should keep their register
4859 * assignments. Caller must take care to handle these.
4860 */
4861DECL_HIDDEN_THROW(uint32_t)
4862iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4863{
4864 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4865
4866 /* fKeepVars will reduce this mask. */
4867 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4868
4869 /*
4870 * Move anything important out of volatile registers.
4871 */
4872 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4873 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4874 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4875#ifdef IEMNATIVE_REG_FIXED_TMP0
4876 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4877#endif
4878#ifdef IEMNATIVE_REG_FIXED_TMP1
4879 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
4880#endif
4881#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4882 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4883#endif
4884 & ~g_afIemNativeCallRegs[cArgs];
4885
4886 fRegsToMove &= pReNative->Core.bmHstRegs;
4887 if (!fRegsToMove)
4888 { /* likely */ }
4889 else
4890 {
4891 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4892 while (fRegsToMove != 0)
4893 {
4894 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4895 fRegsToMove &= ~RT_BIT_32(idxReg);
4896
4897 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4898 {
4899 case kIemNativeWhat_Var:
4900 {
4901 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4902 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4903 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4904 Assert(pVar->idxReg == idxReg);
4905 if (!(RT_BIT_32(idxVar) & fKeepVars))
4906 {
4907 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4908 idxVar, pVar->enmKind, pVar->idxReg));
4909 if (pVar->enmKind != kIemNativeVarKind_Stack)
4910 pVar->idxReg = UINT8_MAX;
4911 else
4912 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4913 }
4914 else
4915 fRegsToFree &= ~RT_BIT_32(idxReg);
4916 continue;
4917 }
4918
4919 case kIemNativeWhat_Arg:
4920 AssertMsgFailed(("What?!?: %u\n", idxReg));
4921 continue;
4922
4923 case kIemNativeWhat_rc:
4924 case kIemNativeWhat_Tmp:
4925 AssertMsgFailed(("Missing free: %u\n", idxReg));
4926 continue;
4927
4928 case kIemNativeWhat_FixedTmp:
4929 case kIemNativeWhat_pVCpuFixed:
4930 case kIemNativeWhat_pCtxFixed:
4931 case kIemNativeWhat_PcShadow:
4932 case kIemNativeWhat_FixedReserved:
4933 case kIemNativeWhat_Invalid:
4934 case kIemNativeWhat_End:
4935 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4936 }
4937 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4938 }
4939 }
4940
4941 /*
4942 * Do the actual freeing.
4943 */
4944 if (pReNative->Core.bmHstRegs & fRegsToFree)
4945 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4946 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4947 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4948
4949 /* If there are guest register shadows in any call-volatile register, we
4950 have to clear the corrsponding guest register masks for each register. */
4951 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4952 if (fHstRegsWithGstShadow)
4953 {
4954 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4955 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
4956 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4957 do
4958 {
4959 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4960 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4961
4962 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4963 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4964 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4965 } while (fHstRegsWithGstShadow != 0);
4966 }
4967
4968 return off;
4969}
4970
4971
4972/**
4973 * Flushes a set of guest register shadow copies.
4974 *
4975 * This is usually done after calling a threaded function or a C-implementation
4976 * of an instruction.
4977 *
4978 * @param pReNative The native recompile state.
4979 * @param fGstRegs Set of guest registers to flush.
4980 */
4981DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4982{
4983 /*
4984 * Reduce the mask by what's currently shadowed
4985 */
4986 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4987 fGstRegs &= bmGstRegShadowsOld;
4988 if (fGstRegs)
4989 {
4990 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4991 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4992 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4993 if (bmGstRegShadowsNew)
4994 {
4995 /*
4996 * Partial.
4997 */
4998 do
4999 {
5000 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5001 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5002 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5003 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
5004 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5005
5006 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
5007 fGstRegs &= ~fInThisHstReg;
5008 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5009 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5010 if (!fGstRegShadowsNew)
5011 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5012 } while (fGstRegs != 0);
5013 }
5014 else
5015 {
5016 /*
5017 * Clear all.
5018 */
5019 do
5020 {
5021 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5022 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5023 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5024 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
5025 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5026
5027 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5028 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5029 } while (fGstRegs != 0);
5030 pReNative->Core.bmHstRegsWithGstShadow = 0;
5031 }
5032 }
5033}
5034
5035
5036/**
5037 * Flushes guest register shadow copies held by a set of host registers.
5038 *
5039 * This is used with the TLB lookup code for ensuring that we don't carry on
5040 * with any guest shadows in volatile registers, as these will get corrupted by
5041 * a TLB miss.
5042 *
5043 * @param pReNative The native recompile state.
5044 * @param fHstRegs Set of host registers to flush guest shadows for.
5045 */
5046DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
5047{
5048 /*
5049 * Reduce the mask by what's currently shadowed.
5050 */
5051 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
5052 fHstRegs &= bmHstRegsWithGstShadowOld;
5053 if (fHstRegs)
5054 {
5055 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
5056 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
5057 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
5058 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
5059 if (bmHstRegsWithGstShadowNew)
5060 {
5061 /*
5062 * Partial (likely).
5063 */
5064 uint64_t fGstShadows = 0;
5065 do
5066 {
5067 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5068 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
5069 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
5070 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
5071
5072 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5073 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5074 fHstRegs &= ~RT_BIT_32(idxHstReg);
5075 } while (fHstRegs != 0);
5076 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
5077 }
5078 else
5079 {
5080 /*
5081 * Clear all.
5082 */
5083 do
5084 {
5085 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5086 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
5087 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
5088 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
5089
5090 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5091 fHstRegs &= ~RT_BIT_32(idxHstReg);
5092 } while (fHstRegs != 0);
5093 pReNative->Core.bmGstRegShadows = 0;
5094 }
5095 }
5096}
5097
5098
5099/**
5100 * Restores guest shadow copies in volatile registers.
5101 *
5102 * This is used after calling a helper function (think TLB miss) to restore the
5103 * register state of volatile registers.
5104 *
5105 * @param pReNative The native recompile state.
5106 * @param off The code buffer offset.
5107 * @param fHstRegsActiveShadows Set of host registers which are allowed to
5108 * be active (allocated) w/o asserting. Hack.
5109 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
5110 * iemNativeVarRestoreVolatileRegsPostHlpCall()
5111 */
5112DECL_HIDDEN_THROW(uint32_t)
5113iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
5114{
5115 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
5116 if (fHstRegs)
5117 {
5118 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
5119 do
5120 {
5121 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5122
5123 /* It's not fatal if a register is active holding a variable that
5124 shadowing a guest register, ASSUMING all pending guest register
5125 writes were flushed prior to the helper call. However, we'll be
5126 emitting duplicate restores, so it wasts code space. */
5127 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
5128 RT_NOREF(fHstRegsActiveShadows);
5129
5130 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5131 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
5132 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
5133 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
5134
5135 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
5136 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
5137
5138 fHstRegs &= ~RT_BIT_32(idxHstReg);
5139 } while (fHstRegs != 0);
5140 }
5141 return off;
5142}
5143
5144
5145#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5146# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
5147static uint32_t iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5148{
5149 /* Compare the shadow with the context value, they should match. */
5150 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
5151 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
5152 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
5153 return off;
5154}
5155# endif
5156
5157/**
5158 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
5159 */
5160static uint32_t
5161iemNativeEmitPcWriteback(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5162{
5163 if (pReNative->Core.offPc)
5164 {
5165# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5166 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5167 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, pReNative->Core.cInstrPcUpdateSkipped);
5168# endif
5169
5170# ifndef IEMNATIVE_REG_FIXED_PC_DBG
5171 /* Allocate a temporary PC register. */
5172 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5173
5174 /* Perform the addition and store the result. */
5175 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5176 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5177
5178 /* Free but don't flush the PC register. */
5179 iemNativeRegFreeTmp(pReNative, idxPcReg);
5180# else
5181 /* Compare the shadow with the context value, they should match. */
5182 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
5183 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
5184# endif
5185
5186 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, pReNative->Core.cInstrPcUpdateSkipped);
5187 pReNative->Core.offPc = 0;
5188 pReNative->Core.cInstrPcUpdateSkipped = 0;
5189 }
5190# if 0 /*def IEMNATIVE_WITH_TB_DEBUG_INFO*/
5191 else
5192 {
5193 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5194 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc);
5195 }
5196# endif
5197
5198 return off;
5199}
5200#endif
5201
5202
5203#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5204
5205
5206/*********************************************************************************************************************************
5207* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
5208*********************************************************************************************************************************/
5209
5210/**
5211 * Info about shadowed guest SIMD register values.
5212 * @see IEMNATIVEGSTSIMDREG
5213 */
5214static struct
5215{
5216 /** Offset in VMCPU of XMM (low 128-bit) registers. */
5217 uint32_t offXmm;
5218 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
5219 uint32_t offYmm;
5220 /** Name (for logging). */
5221 const char *pszName;
5222} const g_aGstSimdShadowInfo[] =
5223{
5224#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
5225 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
5226 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
5227 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
5228 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
5229 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
5230 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
5231 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
5232 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
5233 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
5234 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
5235 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
5236 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
5237 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
5238 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
5239 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
5240 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
5241 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
5242#undef CPUMCTX_OFF_AND_SIZE
5243};
5244AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
5245
5246
5247#ifdef LOG_ENABLED
5248/** Host CPU SIMD register names. */
5249DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
5250{
5251#ifdef RT_ARCH_AMD64
5252 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
5253#elif RT_ARCH_ARM64
5254 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
5255 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
5256#else
5257# error "port me"
5258#endif
5259};
5260#endif
5261
5262
5263DECL_FORCE_INLINE(uint8_t) iemNativeSimdRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, uint8_t idxSimdReg,
5264 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
5265{
5266 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5267
5268 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = enmWhat;
5269 pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows = 0;
5270 RT_NOREF(idxVar);
5271 return idxSimdReg;
5272}
5273
5274
5275/**
5276 * Frees a temporary SIMD register.
5277 *
5278 * Any shadow copies of guest registers assigned to the host register will not
5279 * be flushed by this operation.
5280 */
5281DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
5282{
5283 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
5284 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
5285 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5286 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
5287 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5288}
5289
5290
5291/**
5292 * Locate a register, possibly freeing one up.
5293 *
5294 * This ASSUMES the caller has done the minimal/optimal allocation checks and
5295 * failed.
5296 *
5297 * @returns Host register number on success. Returns UINT8_MAX if no registers
5298 * found, the caller is supposed to deal with this and raise a
5299 * allocation type specific status code (if desired).
5300 *
5301 * @throws VBox status code if we're run into trouble spilling a variable of
5302 * recording debug info. Does NOT throw anything if we're out of
5303 * registers, though.
5304 */
5305static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
5306 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
5307{
5308 //STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
5309 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5310 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5311
5312 AssertFailed();
5313
5314 /*
5315 * Try a freed register that's shadowing a guest register.
5316 */
5317 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
5318 if (fRegs)
5319 {
5320 //STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
5321
5322#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5323 /*
5324 * When we have livness information, we use it to kick out all shadowed
5325 * guest register that will not be needed any more in this TB. If we're
5326 * lucky, this may prevent us from ending up here again.
5327 *
5328 * Note! We must consider the previous entry here so we don't free
5329 * anything that the current threaded function requires (current
5330 * entry is produced by the next threaded function).
5331 */
5332 uint32_t const idxCurCall = pReNative->idxCurCall;
5333 if (idxCurCall > 0)
5334 {
5335 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
5336
5337# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5338 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
5339 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
5340 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
5341#else
5342 /* Construct a mask of the registers not in the read or write state.
5343 Note! We could skips writes, if they aren't from us, as this is just
5344 a hack to prevent trashing registers that have just been written
5345 or will be written when we retire the current instruction. */
5346 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5347 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5348 & IEMLIVENESSBIT_MASK;
5349#endif
5350 /* Merge EFLAGS. */
5351 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
5352 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
5353 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
5354 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
5355 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
5356
5357 /* If it matches any shadowed registers. */
5358 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
5359 {
5360 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
5361 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
5362 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
5363
5364 /* See if we've got any unshadowed registers we can return now. */
5365 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
5366 if (fUnshadowedRegs)
5367 {
5368 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
5369 return (fPreferVolatile
5370 ? ASMBitFirstSetU32(fUnshadowedRegs)
5371 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
5372 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
5373 - 1;
5374 }
5375 }
5376 }
5377#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5378
5379 unsigned const idxReg = (fPreferVolatile
5380 ? ASMBitFirstSetU32(fRegs)
5381 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5382 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
5383 - 1;
5384
5385 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
5386 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
5387 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5388 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
5389 Assert(pReNative->Core.aHstSimdRegs[idxReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5390
5391 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5392 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5393 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5394 return idxReg;
5395 }
5396
5397 /*
5398 * Try free up a variable that's in a register.
5399 *
5400 * We do two rounds here, first evacuating variables we don't need to be
5401 * saved on the stack, then in the second round move things to the stack.
5402 */
5403 //STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
5404 AssertReleaseFailed(); /** @todo */
5405#if 0
5406 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
5407 {
5408 uint32_t fVars = pReNative->Core.bmSimdVars;
5409 while (fVars)
5410 {
5411 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
5412 uint8_t const idxReg = pReNative->Core.aSimdVars[idxVar].idxReg;
5413 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
5414 && (RT_BIT_32(idxReg) & fRegMask)
5415 && ( iLoop == 0
5416 ? pReNative->Core.aSimdVars[idxVar].enmKind != kIemNativeVarKind_Stack
5417 : pReNative->Core.aSimdVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5418 && !pReNative->Core.aSimdVars[idxVar].fRegAcquired)
5419 {
5420 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
5421 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
5422 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5423 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5424 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
5425 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
5426
5427 if (pReNative->Core.aSimdVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5428 {
5429 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
5430 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
5431 }
5432
5433 pReNative->Core.aSimdVars[idxVar].idxReg = UINT8_MAX;
5434 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
5435
5436 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5437 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5438 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5439 return idxReg;
5440 }
5441 fVars &= ~RT_BIT_32(idxVar);
5442 }
5443 }
5444#else
5445 RT_NOREF(poff);
5446#endif
5447
5448 return UINT8_MAX;
5449}
5450
5451
5452/**
5453 * Marks host SIMD register @a idxHstSimdReg as containing a shadow copy of guest
5454 * SIMD register @a enmGstSimdReg.
5455 *
5456 * ASSUMES that caller has made sure @a enmGstSimdReg is not associated with any
5457 * host register before calling.
5458 */
5459DECL_FORCE_INLINE(void)
5460iemNativeSimdRegMarkAsGstSimdRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg, IEMNATIVEGSTSIMDREG enmGstSimdReg, uint32_t off)
5461{
5462 Assert(!(pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg)));
5463 Assert(!pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5464 Assert((unsigned)enmGstSimdReg < (unsigned)kIemNativeGstSimdReg_End);
5465
5466 pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg] = idxHstSimdReg;
5467 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows |= RT_BIT_64(enmGstSimdReg);
5468 pReNative->Core.bmGstSimdRegShadows |= RT_BIT_64(enmGstSimdReg);
5469 pReNative->Core.bmHstSimdRegsWithGstShadow |= RT_BIT_32(idxHstSimdReg);
5470#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5471 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5472 iemNativeDbgInfoAddGuestSimdRegShadowing(pReNative, enmGstSimdReg, idxHstSimdReg);
5473#else
5474 RT_NOREF(off);
5475#endif
5476}
5477
5478
5479/**
5480 * Transfers the guest SIMD register shadow claims of @a enmGstSimdReg from @a idxSimdRegFrom
5481 * to @a idxSimdRegTo.
5482 */
5483DECL_FORCE_INLINE(void)
5484iemNativeSimdRegTransferGstSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxSimdRegFrom, uint8_t idxSimdRegTo,
5485 IEMNATIVEGSTSIMDREG enmGstSimdReg, uint32_t off)
5486{
5487 Assert(pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
5488 Assert(pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg] == idxSimdRegFrom);
5489 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows)
5490 == pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows
5491 && pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5492 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxSimdRegTo].fGstRegShadows)
5493 == pReNative->Core.aHstSimdRegs[idxSimdRegTo].fGstRegShadows);
5494 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdRegFrom))
5495 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows));
5496 Assert( pReNative->Core.aHstSimdRegs[idxSimdRegFrom].enmLoaded
5497 == pReNative->Core.aHstSimdRegs[idxSimdRegTo].enmLoaded);
5498
5499
5500 uint64_t const fGstRegShadowsFrom = pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows & ~RT_BIT_64(enmGstSimdReg);
5501 pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows = fGstRegShadowsFrom;
5502 if (!fGstRegShadowsFrom)
5503 {
5504 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxSimdRegFrom);
5505 pReNative->Core.aHstSimdRegs[idxSimdRegFrom].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5506 }
5507 pReNative->Core.bmHstSimdRegsWithGstShadow |= RT_BIT_32(idxSimdRegTo);
5508 pReNative->Core.aHstSimdRegs[idxSimdRegTo].fGstRegShadows |= RT_BIT_64(enmGstSimdReg);
5509 pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg] = idxSimdRegTo;
5510#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5511 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5512 iemNativeDbgInfoAddGuestSimdRegShadowing(pReNative, enmGstSimdReg, idxSimdRegTo, idxSimdRegFrom);
5513#else
5514 RT_NOREF(off);
5515#endif
5516}
5517
5518
5519/**
5520 * Clear any guest register shadow claims from @a idxHstSimdReg.
5521 *
5522 * The register does not need to be shadowing any guest registers.
5523 */
5524DECL_FORCE_INLINE(void)
5525iemNativeSimdRegClearGstSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg, uint32_t off)
5526{
5527 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5528 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows
5529 && pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstSimdReg_End));
5530 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstSimdReg))
5531 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5532 Assert( !(pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadowDirtyLo128)
5533 && !(pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadowDirtyHi128));
5534
5535#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5536 uint64_t fGstRegs = pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
5537 if (fGstRegs)
5538 {
5539 Assert(fGstRegs < RT_BIT_64(kIemNativeGstSimdReg_End));
5540 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5541 while (fGstRegs)
5542 {
5543 unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5544 fGstRegs &= ~RT_BIT_64(iGstReg);
5545 iemNativeDbgInfoAddGuestSimdRegShadowing(pReNative, (IEMNATIVEGSTSIMDREG)iGstReg, UINT8_MAX, idxHstSimdReg);
5546 }
5547 }
5548#else
5549 RT_NOREF(off);
5550#endif
5551
5552 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstSimdReg);
5553 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
5554 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5555 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5556}
5557
5558
5559/**
5560 * Flushes a set of guest register shadow copies.
5561 *
5562 * This is usually done after calling a threaded function or a C-implementation
5563 * of an instruction.
5564 *
5565 * @param pReNative The native recompile state.
5566 * @param fGstSimdRegs Set of guest SIMD registers to flush.
5567 */
5568DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
5569{
5570 /*
5571 * Reduce the mask by what's currently shadowed
5572 */
5573 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
5574 fGstSimdRegs &= bmGstSimdRegShadows;
5575 if (fGstSimdRegs)
5576 {
5577 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
5578 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
5579 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
5580 if (bmGstSimdRegShadowsNew)
5581 {
5582 /*
5583 * Partial.
5584 */
5585 do
5586 {
5587 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5588 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5589 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5590 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5591 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5592 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5593
5594 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
5595 fGstSimdRegs &= ~fInThisHstReg;
5596 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5597 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5598 if (!fGstRegShadowsNew)
5599 {
5600 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5601 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5602 }
5603 } while (fGstSimdRegs != 0);
5604 }
5605 else
5606 {
5607 /*
5608 * Clear all.
5609 */
5610 do
5611 {
5612 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5613 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5614 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5615 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5616 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5617 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5618
5619 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5620 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
5621 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5622 } while (fGstSimdRegs != 0);
5623 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
5624 }
5625 }
5626}
5627
5628
5629/**
5630 * Allocates a temporary host SIMD register.
5631 *
5632 * This may emit code to save register content onto the stack in order to free
5633 * up a register.
5634 *
5635 * @returns The host register number; throws VBox status code on failure,
5636 * so no need to check the return value.
5637 * @param pReNative The native recompile state.
5638 * @param poff Pointer to the variable with the code buffer position.
5639 * This will be update if we need to move a variable from
5640 * register to stack in order to satisfy the request.
5641 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5642 * registers (@c true, default) or the other way around
5643 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5644 */
5645DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
5646{
5647 /*
5648 * Try find a completely unused register, preferably a call-volatile one.
5649 */
5650 uint8_t idxSimdReg;
5651 uint32_t fRegs = ~pReNative->Core.bmHstRegs
5652 & ~pReNative->Core.bmHstRegsWithGstShadow
5653 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
5654 if (fRegs)
5655 {
5656 if (fPreferVolatile)
5657 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5658 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5659 else
5660 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5661 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5662 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5663 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5664 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5665 }
5666 else
5667 {
5668 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
5669 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5670 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5671 }
5672
5673 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5674 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5675}
5676
5677
5678/**
5679 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
5680 * registers.
5681 *
5682 * @returns The host register number; throws VBox status code on failure,
5683 * so no need to check the return value.
5684 * @param pReNative The native recompile state.
5685 * @param poff Pointer to the variable with the code buffer position.
5686 * This will be update if we need to move a variable from
5687 * register to stack in order to satisfy the request.
5688 * @param fRegMask Mask of acceptable registers.
5689 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5690 * registers (@c true, default) or the other way around
5691 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5692 */
5693DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
5694 bool fPreferVolatile /*= true*/)
5695{
5696 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5697 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5698
5699 /*
5700 * Try find a completely unused register, preferably a call-volatile one.
5701 */
5702 uint8_t idxSimdReg;
5703 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
5704 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5705 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
5706 & fRegMask;
5707 if (fRegs)
5708 {
5709 if (fPreferVolatile)
5710 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5711 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5712 else
5713 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5714 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5715 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5716 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5717 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5718 }
5719 else
5720 {
5721 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
5722 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5723 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5724 }
5725
5726 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5727 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5728}
5729
5730
5731static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdRegDst,
5732 uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
5733{
5734 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
5735 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegDst].enmLoaded == enmLoadSzDst
5736 || pReNative->Core.aHstSimdRegs[idxHstSimdRegDst].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5737 {
5738# ifdef RT_ARCH_ARM64
5739 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
5740 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
5741# endif
5742
5743 switch (enmLoadSzDst)
5744 {
5745 case kIemNativeGstSimdRegLdStSz_256:
5746 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5747 break;
5748 case kIemNativeGstSimdRegLdStSz_Low128:
5749 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5750 break;
5751 case kIemNativeGstSimdRegLdStSz_High128:
5752 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst + 1, idxHstSimdRegSrc + 1);
5753 break;
5754 default:
5755 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5756 }
5757
5758 pReNative->Core.aHstSimdRegs[idxHstSimdRegDst].enmLoaded = enmLoadSzDst;
5759 }
5760 else
5761 {
5762 /* Complicated stuff where the source is currently missing something, later. */
5763 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5764 }
5765
5766 return off;
5767}
5768
5769
5770/**
5771 * Allocates a temporary host SIMD register for keeping a guest
5772 * SIMD register value.
5773 *
5774 * Since we may already have a register holding the guest register value,
5775 * code will be emitted to do the loading if that's not the case. Code may also
5776 * be emitted if we have to free up a register to satify the request.
5777 *
5778 * @returns The host register number; throws VBox status code on failure, so no
5779 * need to check the return value.
5780 * @param pReNative The native recompile state.
5781 * @param poff Pointer to the variable with the code buffer
5782 * position. This will be update if we need to move a
5783 * variable from register to stack in order to satisfy
5784 * the request.
5785 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
5786 * @param enmIntendedUse How the caller will be using the host register.
5787 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
5788 * register is okay (default). The ASSUMPTION here is
5789 * that the caller has already flushed all volatile
5790 * registers, so this is only applied if we allocate a
5791 * new register.
5792 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
5793 */
5794DECL_HIDDEN_THROW(uint8_t)
5795iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
5796 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
5797 bool fNoVolatileRegs /*= false*/)
5798{
5799 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
5800#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
5801 AssertMsg( pReNative->idxCurCall == 0
5802 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5803 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5804 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
5805 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5806 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
5807 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
5808#endif
5809#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5810 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
5811#endif
5812 uint32_t const fRegMask = !fNoVolatileRegs
5813 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
5814 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
5815
5816 /*
5817 * First check if the guest register value is already in a host register.
5818 */
5819 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
5820 {
5821 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5822 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
5823 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
5824 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
5825
5826 /* It's not supposed to be allocated... */
5827 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
5828 {
5829 /*
5830 * If the register will trash the guest shadow copy, try find a
5831 * completely unused register we can use instead. If that fails,
5832 * we need to disassociate the host reg from the guest reg.
5833 */
5834 /** @todo would be nice to know if preserving the register is in any way helpful. */
5835 /* If the purpose is calculations, try duplicate the register value as
5836 we'll be clobbering the shadow. */
5837 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
5838 && ( ~pReNative->Core.bmHstSimdRegs
5839 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5840 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
5841 {
5842 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
5843
5844 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5845
5846 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5847 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5848 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5849 idxSimdReg = idxRegNew;
5850 }
5851 /* If the current register matches the restrictions, go ahead and allocate
5852 it for the caller. */
5853 else if (fRegMask & RT_BIT_32(idxSimdReg))
5854 {
5855 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5856 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
5857 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5858 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
5859 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5860 else
5861 {
5862 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
5863 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
5864 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
5865 }
5866 }
5867 /* Otherwise, allocate a register that satisfies the caller and transfer
5868 the shadowing if compatible with the intended use. (This basically
5869 means the call wants a non-volatile register (RSP push/pop scenario).) */
5870 else
5871 {
5872 Assert(fNoVolatileRegs);
5873 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
5874 !fNoVolatileRegs
5875 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
5876 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5877 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5878 {
5879 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5880 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
5881 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
5882 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5883 }
5884 else
5885 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5886 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5887 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5888 idxSimdReg = idxRegNew;
5889 }
5890 }
5891 else
5892 {
5893 /*
5894 * Oops. Shadowed guest register already allocated!
5895 *
5896 * Allocate a new register, copy the value and, if updating, the
5897 * guest shadow copy assignment to the new register.
5898 */
5899 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5900 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
5901 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
5902 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
5903
5904 /** @todo share register for readonly access. */
5905 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
5906 enmIntendedUse == kIemNativeGstRegUse_Calculation);
5907
5908 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5909 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5910 else
5911 {
5912 /** @todo This is a bit unsafe to mark the register already as loaded even though there is nothing written to it yet. */
5913 pReNative->Core.aHstSimdRegs[idxRegNew].enmLoaded = enmLoadSz;
5914 }
5915
5916 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5917 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5918 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
5919 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5920 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5921 else
5922 {
5923 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5924 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
5925 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5926 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5927 }
5928 idxSimdReg = idxRegNew;
5929 }
5930 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
5931
5932#ifdef VBOX_STRICT
5933 /* Strict builds: Check that the value is correct. */
5934 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
5935#endif
5936
5937 return idxSimdReg;
5938 }
5939
5940 /*
5941 * Allocate a new register, load it with the guest value and designate it as a copy of the
5942 */
5943 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
5944
5945 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5946 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
5947 else
5948 {
5949 /** @todo This is a bit unsafe to mark the register already as loaded even though there is nothing written to it yet. */
5950 pReNative->Core.aHstSimdRegs[idxRegNew].enmLoaded = enmLoadSz;
5951 }
5952
5953 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5954 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
5955
5956 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
5957 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5958
5959 return idxRegNew;
5960}
5961
5962
5963/**
5964 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
5965 *
5966 * @returns New code bufferoffset.
5967 * @param pReNative The native recompile state.
5968 * @param off Current code buffer position.
5969 * @param idxGstSimdReg The guest SIMD register to flush.
5970 */
5971static uint32_t iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGstSimdReg)
5972{
5973 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxGstSimdReg];
5974
5975 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
5976 g_aGstSimdShadowInfo[idxGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
5977 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, idxGstSimdReg),
5978 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, idxGstSimdReg)));
5979
5980 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, idxGstSimdReg))
5981 {
5982 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5983 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
5984 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[idxGstSimdReg].offXmm);
5985 }
5986
5987 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, idxGstSimdReg))
5988 {
5989 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5990 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
5991 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[idxGstSimdReg].offYmm);
5992 }
5993
5994 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, idxGstSimdReg);
5995 return off;
5996}
5997
5998#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5999
6000
6001
6002/*********************************************************************************************************************************
6003* Code emitters for flushing pending guest register writes and sanity checks *
6004*********************************************************************************************************************************/
6005
6006/**
6007 * Flushes delayed write of a specific guest register.
6008 *
6009 * This must be called prior to calling CImpl functions and any helpers that use
6010 * the guest state (like raising exceptions) and such.
6011 *
6012 * This optimization has not yet been implemented. The first target would be
6013 * RIP updates, since these are the most common ones.
6014 */
6015DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6016 IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
6017{
6018#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6019 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
6020#endif
6021
6022#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6023 if ( enmClass == kIemNativeGstRegRef_XReg
6024 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxReg))
6025 {
6026 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, idxReg);
6027 /* Flush the shadows as the register needs to be reloaded (there is no guarantee right now, that the referenced register doesn't change). */
6028 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxReg];
6029
6030 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
6031 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxReg)));
6032 }
6033#endif
6034 RT_NOREF(pReNative, enmClass, idxReg);
6035 return off;
6036}
6037
6038
6039/**
6040 * Flushes any delayed guest register writes.
6041 *
6042 * This must be called prior to calling CImpl functions and any helpers that use
6043 * the guest state (like raising exceptions) and such.
6044 *
6045 * This optimization has not yet been implemented. The first target would be
6046 * RIP updates, since these are the most common ones.
6047 */
6048DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept /*= 0*/,
6049 bool fFlushShadows /*= true*/)
6050{
6051#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6052 if (!(fGstShwExcept & kIemNativeGstReg_Pc))
6053 off = iemNativeEmitPcWriteback(pReNative, off);
6054#else
6055 RT_NOREF(pReNative, fGstShwExcept);
6056#endif
6057
6058#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6059 /** @todo This doesn't mix well with fGstShwExcept but we ignore this for now and just flush everything. */
6060 for (uint8_t idxGstSimdReg = 0; idxGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo); idxGstSimdReg++)
6061 {
6062 Assert( (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg)
6063 || !IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg)));
6064
6065 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg))
6066 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, idxGstSimdReg);
6067
6068 if ( fFlushShadows
6069 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg))
6070 {
6071 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxGstSimdReg];
6072
6073 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
6074 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg)));
6075 }
6076 }
6077#else
6078 RT_NOREF(pReNative, fGstShwExcept, fFlushShadows);
6079#endif
6080
6081 return off;
6082}
6083
6084
6085#ifdef VBOX_STRICT
6086/**
6087 * Does internal register allocator sanity checks.
6088 */
6089static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
6090{
6091 /*
6092 * Iterate host registers building a guest shadowing set.
6093 */
6094 uint64_t bmGstRegShadows = 0;
6095 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
6096 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
6097 while (bmHstRegsWithGstShadow)
6098 {
6099 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
6100 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
6101 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
6102
6103 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
6104 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
6105 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
6106 bmGstRegShadows |= fThisGstRegShadows;
6107 while (fThisGstRegShadows)
6108 {
6109 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
6110 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
6111 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
6112 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
6113 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
6114 }
6115 }
6116 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
6117 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
6118 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
6119
6120 /*
6121 * Now the other way around, checking the guest to host index array.
6122 */
6123 bmHstRegsWithGstShadow = 0;
6124 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
6125 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
6126 while (bmGstRegShadows)
6127 {
6128 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
6129 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
6130 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
6131
6132 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
6133 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
6134 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
6135 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
6136 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
6137 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
6138 }
6139 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
6140 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
6141 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
6142}
6143#endif
6144
6145
6146/*********************************************************************************************************************************
6147* Code Emitters (larger snippets) *
6148*********************************************************************************************************************************/
6149
6150/**
6151 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
6152 * extending to 64-bit width.
6153 *
6154 * @returns New code buffer offset on success, UINT32_MAX on failure.
6155 * @param pReNative .
6156 * @param off The current code buffer position.
6157 * @param idxHstReg The host register to load the guest register value into.
6158 * @param enmGstReg The guest register to load.
6159 *
6160 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6161 * that is something the caller needs to do if applicable.
6162 */
6163DECL_HIDDEN_THROW(uint32_t)
6164iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
6165{
6166 Assert((unsigned)enmGstReg < RT_ELEMENTS(g_aGstShadowInfo));
6167 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
6168
6169 switch (g_aGstShadowInfo[enmGstReg].cb)
6170 {
6171 case sizeof(uint64_t):
6172 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6173 case sizeof(uint32_t):
6174 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6175 case sizeof(uint16_t):
6176 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6177#if 0 /* not present in the table. */
6178 case sizeof(uint8_t):
6179 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6180#endif
6181 default:
6182 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6183 }
6184}
6185
6186
6187#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6188/**
6189 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
6190 *
6191 * @returns New code buffer offset on success, UINT32_MAX on failure.
6192 * @param pReNative The recompiler state.
6193 * @param off The current code buffer position.
6194 * @param idxHstSimdReg The host register to load the guest register value into.
6195 * @param enmGstSimdReg The guest register to load.
6196 * @param enmLoadSz The load size of the register.
6197 *
6198 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6199 * that is something the caller needs to do if applicable.
6200 */
6201DECL_HIDDEN_THROW(uint32_t)
6202iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
6203 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6204{
6205 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
6206
6207 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
6208 switch (enmLoadSz)
6209 {
6210 case kIemNativeGstSimdRegLdStSz_256:
6211 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6212 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6213 case kIemNativeGstSimdRegLdStSz_Low128:
6214 return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6215 case kIemNativeGstSimdRegLdStSz_High128:
6216 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6217 default:
6218 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6219 }
6220}
6221#endif
6222
6223#ifdef VBOX_STRICT
6224/**
6225 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
6226 *
6227 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6228 * Trashes EFLAGS on AMD64.
6229 */
6230static uint32_t
6231iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
6232{
6233# ifdef RT_ARCH_AMD64
6234 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6235
6236 /* rol reg64, 32 */
6237 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6238 pbCodeBuf[off++] = 0xc1;
6239 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6240 pbCodeBuf[off++] = 32;
6241
6242 /* test reg32, ffffffffh */
6243 if (idxReg >= 8)
6244 pbCodeBuf[off++] = X86_OP_REX_B;
6245 pbCodeBuf[off++] = 0xf7;
6246 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6247 pbCodeBuf[off++] = 0xff;
6248 pbCodeBuf[off++] = 0xff;
6249 pbCodeBuf[off++] = 0xff;
6250 pbCodeBuf[off++] = 0xff;
6251
6252 /* je/jz +1 */
6253 pbCodeBuf[off++] = 0x74;
6254 pbCodeBuf[off++] = 0x01;
6255
6256 /* int3 */
6257 pbCodeBuf[off++] = 0xcc;
6258
6259 /* rol reg64, 32 */
6260 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6261 pbCodeBuf[off++] = 0xc1;
6262 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6263 pbCodeBuf[off++] = 32;
6264
6265# elif defined(RT_ARCH_ARM64)
6266 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6267 /* lsr tmp0, reg64, #32 */
6268 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
6269 /* cbz tmp0, +1 */
6270 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6271 /* brk #0x1100 */
6272 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
6273
6274# else
6275# error "Port me!"
6276# endif
6277 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6278 return off;
6279}
6280#endif /* VBOX_STRICT */
6281
6282
6283#ifdef VBOX_STRICT
6284/**
6285 * Emitting code that checks that the content of register @a idxReg is the same
6286 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
6287 * instruction if that's not the case.
6288 *
6289 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6290 * Trashes EFLAGS on AMD64.
6291 */
6292static uint32_t
6293iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
6294{
6295# ifdef RT_ARCH_AMD64
6296 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6297
6298 /* cmp reg, [mem] */
6299 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
6300 {
6301 if (idxReg >= 8)
6302 pbCodeBuf[off++] = X86_OP_REX_R;
6303 pbCodeBuf[off++] = 0x38;
6304 }
6305 else
6306 {
6307 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
6308 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
6309 else
6310 {
6311 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
6312 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6313 else
6314 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
6315 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
6316 if (idxReg >= 8)
6317 pbCodeBuf[off++] = X86_OP_REX_R;
6318 }
6319 pbCodeBuf[off++] = 0x39;
6320 }
6321 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
6322
6323 /* je/jz +1 */
6324 pbCodeBuf[off++] = 0x74;
6325 pbCodeBuf[off++] = 0x01;
6326
6327 /* int3 */
6328 pbCodeBuf[off++] = 0xcc;
6329
6330 /* For values smaller than the register size, we must check that the rest
6331 of the register is all zeros. */
6332 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
6333 {
6334 /* test reg64, imm32 */
6335 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6336 pbCodeBuf[off++] = 0xf7;
6337 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6338 pbCodeBuf[off++] = 0;
6339 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
6340 pbCodeBuf[off++] = 0xff;
6341 pbCodeBuf[off++] = 0xff;
6342
6343 /* je/jz +1 */
6344 pbCodeBuf[off++] = 0x74;
6345 pbCodeBuf[off++] = 0x01;
6346
6347 /* int3 */
6348 pbCodeBuf[off++] = 0xcc;
6349 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6350 }
6351 else
6352 {
6353 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6354 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
6355 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
6356 }
6357
6358# elif defined(RT_ARCH_ARM64)
6359 /* mov TMP0, [gstreg] */
6360 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
6361
6362 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6363 /* sub tmp0, tmp0, idxReg */
6364 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
6365 /* cbz tmp0, +1 */
6366 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6367 /* brk #0x1000+enmGstReg */
6368 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
6369 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6370
6371# else
6372# error "Port me!"
6373# endif
6374 return off;
6375}
6376
6377
6378# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6379/**
6380 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
6381 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
6382 * instruction if that's not the case.
6383 *
6384 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
6385 * Trashes EFLAGS on AMD64.
6386 */
6387static uint32_t
6388iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg, IEMNATIVEGSTSIMDREG enmGstSimdReg,
6389 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6390{
6391# ifdef RT_ARCH_AMD64
6392 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128); /** @todo 256-bit variant. */
6393
6394 /* movdqa vectmp0, idxSimdReg */
6395 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6396
6397 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
6398
6399 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
6400 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6401 if (idxSimdReg >= 8)
6402 pbCodeBuf[off++] = X86_OP_REX_R;
6403 pbCodeBuf[off++] = 0x0f;
6404 pbCodeBuf[off++] = 0x38;
6405 pbCodeBuf[off++] = 0x29;
6406 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6407
6408 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
6409 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6410 pbCodeBuf[off++] = X86_OP_REX_W
6411 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
6412 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6413 pbCodeBuf[off++] = 0x0f;
6414 pbCodeBuf[off++] = 0x3a;
6415 pbCodeBuf[off++] = 0x16;
6416 pbCodeBuf[off++] = 0xeb;
6417 pbCodeBuf[off++] = 0x00;
6418
6419 /* test tmp0, 0xffffffff. */
6420 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6421 pbCodeBuf[off++] = 0xf7;
6422 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, IEMNATIVE_REG_FIXED_TMP0 & 7);
6423 pbCodeBuf[off++] = 0xff;
6424 pbCodeBuf[off++] = 0xff;
6425 pbCodeBuf[off++] = 0xff;
6426 pbCodeBuf[off++] = 0xff;
6427
6428 /* je/jz +1 */
6429 pbCodeBuf[off++] = 0x74;
6430 pbCodeBuf[off++] = 0x01;
6431
6432 /* int3 */
6433 pbCodeBuf[off++] = 0xcc;
6434
6435 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
6436 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6437 pbCodeBuf[off++] = X86_OP_REX_W
6438 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
6439 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6440 pbCodeBuf[off++] = 0x0f;
6441 pbCodeBuf[off++] = 0x3a;
6442 pbCodeBuf[off++] = 0x16;
6443 pbCodeBuf[off++] = 0xeb;
6444 pbCodeBuf[off++] = 0x01;
6445
6446 /* test tmp0, 0xffffffff. */
6447 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6448 pbCodeBuf[off++] = 0xf7;
6449 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, IEMNATIVE_REG_FIXED_TMP0 & 7);
6450 pbCodeBuf[off++] = 0xff;
6451 pbCodeBuf[off++] = 0xff;
6452 pbCodeBuf[off++] = 0xff;
6453 pbCodeBuf[off++] = 0xff;
6454
6455 /* je/jz +1 */
6456 pbCodeBuf[off++] = 0x74;
6457 pbCodeBuf[off++] = 0x01;
6458
6459 /* int3 */
6460 pbCodeBuf[off++] = 0xcc;
6461
6462# elif defined(RT_ARCH_ARM64)
6463 /* mov vectmp0, [gstreg] */
6464 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
6465
6466 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6467 {
6468 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6469 /* eor vectmp0, vectmp0, idxSimdReg */
6470 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6471 /* cnt vectmp0, vectmp0, #0*/
6472 pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0);
6473 /* umov tmp0, vectmp0.D[0] */
6474 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6475 0 /*idxElem*/, kArmv8InstrUmovSz_U64);
6476 /* cbz tmp0, +1 */
6477 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6478 /* brk #0x1000+enmGstReg */
6479 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6480 }
6481
6482 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6483 {
6484 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6485 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
6486 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg);
6487 /* cnt vectmp0 + 1, vectmp0 + 1, #0*/
6488 pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1);
6489 /* umov tmp0, (vectmp0 + 1).D[0] */
6490 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
6491 0 /*idxElem*/, kArmv8InstrUmovSz_U64);
6492 /* cbz tmp0, +1 */
6493 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6494 /* brk #0x1000+enmGstReg */
6495 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6496 }
6497
6498# else
6499# error "Port me!"
6500# endif
6501
6502 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6503 return off;
6504}
6505# endif
6506#endif /* VBOX_STRICT */
6507
6508
6509#ifdef VBOX_STRICT
6510/**
6511 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
6512 * important bits.
6513 *
6514 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6515 * Trashes EFLAGS on AMD64.
6516 */
6517static uint32_t
6518iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
6519{
6520 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6521 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
6522 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
6523 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
6524
6525#ifdef RT_ARCH_AMD64
6526 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6527
6528 /* je/jz +1 */
6529 pbCodeBuf[off++] = 0x74;
6530 pbCodeBuf[off++] = 0x01;
6531
6532 /* int3 */
6533 pbCodeBuf[off++] = 0xcc;
6534
6535# elif defined(RT_ARCH_ARM64)
6536 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6537
6538 /* b.eq +1 */
6539 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
6540 /* brk #0x2000 */
6541 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
6542
6543# else
6544# error "Port me!"
6545# endif
6546 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6547
6548 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6549 return off;
6550}
6551#endif /* VBOX_STRICT */
6552
6553
6554/**
6555 * Emits a code for checking the return code of a call and rcPassUp, returning
6556 * from the code if either are non-zero.
6557 */
6558DECL_HIDDEN_THROW(uint32_t)
6559iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6560{
6561#ifdef RT_ARCH_AMD64
6562 /*
6563 * AMD64: eax = call status code.
6564 */
6565
6566 /* edx = rcPassUp */
6567 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6568# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6569 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
6570# endif
6571
6572 /* edx = eax | rcPassUp */
6573 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6574 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
6575 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
6576 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6577
6578 /* Jump to non-zero status return path. */
6579 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
6580
6581 /* done. */
6582
6583#elif RT_ARCH_ARM64
6584 /*
6585 * ARM64: w0 = call status code.
6586 */
6587# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6588 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
6589# endif
6590 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6591
6592 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6593
6594 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
6595
6596 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6597 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6598 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
6599
6600#else
6601# error "port me"
6602#endif
6603 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6604 RT_NOREF_PV(idxInstr);
6605 return off;
6606}
6607
6608
6609/**
6610 * Emits code to check if the content of @a idxAddrReg is a canonical address,
6611 * raising a \#GP(0) if it isn't.
6612 *
6613 * @returns New code buffer offset, UINT32_MAX on failure.
6614 * @param pReNative The native recompile state.
6615 * @param off The code buffer offset.
6616 * @param idxAddrReg The host register with the address to check.
6617 * @param idxInstr The current instruction.
6618 */
6619DECL_HIDDEN_THROW(uint32_t)
6620iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
6621{
6622 /*
6623 * Make sure we don't have any outstanding guest register writes as we may
6624 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6625 */
6626 off = iemNativeRegFlushPendingWrites(pReNative, off);
6627
6628#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6629 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6630#else
6631 RT_NOREF(idxInstr);
6632#endif
6633
6634#ifdef RT_ARCH_AMD64
6635 /*
6636 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
6637 * return raisexcpt();
6638 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
6639 */
6640 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6641
6642 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
6643 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
6644 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
6645 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
6646 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6647
6648 iemNativeRegFreeTmp(pReNative, iTmpReg);
6649
6650#elif defined(RT_ARCH_ARM64)
6651 /*
6652 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
6653 * return raisexcpt();
6654 * ----
6655 * mov x1, 0x800000000000
6656 * add x1, x0, x1
6657 * cmp xzr, x1, lsr 48
6658 * b.ne .Lraisexcpt
6659 */
6660 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6661
6662 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
6663 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
6664 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
6665 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6666
6667 iemNativeRegFreeTmp(pReNative, iTmpReg);
6668
6669#else
6670# error "Port me"
6671#endif
6672 return off;
6673}
6674
6675
6676/**
6677 * Emits code to check if that the content of @a idxAddrReg is within the limit
6678 * of CS, raising a \#GP(0) if it isn't.
6679 *
6680 * @returns New code buffer offset; throws VBox status code on error.
6681 * @param pReNative The native recompile state.
6682 * @param off The code buffer offset.
6683 * @param idxAddrReg The host register (32-bit) with the address to
6684 * check.
6685 * @param idxInstr The current instruction.
6686 */
6687DECL_HIDDEN_THROW(uint32_t)
6688iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6689 uint8_t idxAddrReg, uint8_t idxInstr)
6690{
6691 /*
6692 * Make sure we don't have any outstanding guest register writes as we may
6693 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6694 */
6695 off = iemNativeRegFlushPendingWrites(pReNative, off);
6696
6697#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6698 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6699#else
6700 RT_NOREF(idxInstr);
6701#endif
6702
6703 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
6704 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
6705 kIemNativeGstRegUse_ReadOnly);
6706
6707 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
6708 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6709
6710 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
6711 return off;
6712}
6713
6714
6715/**
6716 * Converts IEM_CIMPL_F_XXX flags into a guest register shadow copy flush mask.
6717 *
6718 * @returns The flush mask.
6719 * @param fCImpl The IEM_CIMPL_F_XXX flags.
6720 * @param fGstShwFlush The starting flush mask.
6721 */
6722DECL_FORCE_INLINE(uint64_t) iemNativeCImplFlagsToGuestShadowFlushMask(uint32_t fCImpl, uint64_t fGstShwFlush)
6723{
6724 if (fCImpl & IEM_CIMPL_F_BRANCH_FAR)
6725 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_CS)
6726 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_CS)
6727 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS);
6728 if (fCImpl & IEM_CIMPL_F_BRANCH_STACK_FAR)
6729 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP)
6730 | RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_SS)
6731 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_SS)
6732 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_SS);
6733 else if (fCImpl & IEM_CIMPL_F_BRANCH_STACK)
6734 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP);
6735 if (fCImpl & (IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_STATUS_FLAGS | IEM_CIMPL_F_INHIBIT_SHADOW))
6736 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
6737 return fGstShwFlush;
6738}
6739
6740
6741/**
6742 * Emits a call to a CImpl function or something similar.
6743 */
6744DECL_HIDDEN_THROW(uint32_t)
6745iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
6746 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
6747{
6748 /* Writeback everything. */
6749 off = iemNativeRegFlushPendingWrites(pReNative, off);
6750
6751 /*
6752 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
6753 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
6754 */
6755 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
6756 fGstShwFlush
6757 | RT_BIT_64(kIemNativeGstReg_Pc)
6758 | RT_BIT_64(kIemNativeGstReg_EFlags));
6759 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6760
6761 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6762
6763 /*
6764 * Load the parameters.
6765 */
6766#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
6767 /* Special code the hidden VBOXSTRICTRC pointer. */
6768 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6769 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6770 if (cAddParams > 0)
6771 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
6772 if (cAddParams > 1)
6773 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
6774 if (cAddParams > 2)
6775 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
6776 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6777
6778#else
6779 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6780 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6781 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6782 if (cAddParams > 0)
6783 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
6784 if (cAddParams > 1)
6785 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
6786 if (cAddParams > 2)
6787# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
6788 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
6789# else
6790 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
6791# endif
6792#endif
6793
6794 /*
6795 * Make the call.
6796 */
6797 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
6798
6799#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6800 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6801#endif
6802
6803 /*
6804 * Check the status code.
6805 */
6806 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
6807}
6808
6809
6810/**
6811 * Emits a call to a threaded worker function.
6812 */
6813DECL_HIDDEN_THROW(uint32_t)
6814iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6815{
6816 /* We don't know what the threaded function is doing so we must flush all pending writes. */
6817 off = iemNativeRegFlushPendingWrites(pReNative, off);
6818
6819 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
6820 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6821
6822#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6823 /* The threaded function may throw / long jmp, so set current instruction
6824 number if we're counting. */
6825 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6826#endif
6827
6828 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
6829
6830#ifdef RT_ARCH_AMD64
6831 /* Load the parameters and emit the call. */
6832# ifdef RT_OS_WINDOWS
6833# ifndef VBOXSTRICTRC_STRICT_ENABLED
6834 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6835 if (cParams > 0)
6836 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
6837 if (cParams > 1)
6838 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
6839 if (cParams > 2)
6840 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
6841# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
6842 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
6843 if (cParams > 0)
6844 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
6845 if (cParams > 1)
6846 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
6847 if (cParams > 2)
6848 {
6849 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
6850 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
6851 }
6852 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6853# endif /* VBOXSTRICTRC_STRICT_ENABLED */
6854# else
6855 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6856 if (cParams > 0)
6857 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
6858 if (cParams > 1)
6859 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
6860 if (cParams > 2)
6861 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
6862# endif
6863
6864 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6865
6866# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6867 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6868# endif
6869
6870#elif RT_ARCH_ARM64
6871 /*
6872 * ARM64:
6873 */
6874 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6875 if (cParams > 0)
6876 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
6877 if (cParams > 1)
6878 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
6879 if (cParams > 2)
6880 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
6881
6882 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6883
6884#else
6885# error "port me"
6886#endif
6887
6888 /*
6889 * Check the status code.
6890 */
6891 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
6892
6893 return off;
6894}
6895
6896#ifdef VBOX_WITH_STATISTICS
6897/**
6898 * Emits code to update the thread call statistics.
6899 */
6900DECL_INLINE_THROW(uint32_t)
6901iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6902{
6903 /*
6904 * Update threaded function stats.
6905 */
6906 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
6907 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
6908# if defined(RT_ARCH_ARM64)
6909 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6910 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6911 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
6912 iemNativeRegFreeTmp(pReNative, idxTmp1);
6913 iemNativeRegFreeTmp(pReNative, idxTmp2);
6914# else
6915 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
6916# endif
6917 return off;
6918}
6919#endif /* VBOX_WITH_STATISTICS */
6920
6921
6922/**
6923 * Emits the code at the CheckBranchMiss label.
6924 */
6925static uint32_t iemNativeEmitCheckBranchMiss(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6926{
6927 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_CheckBranchMiss);
6928 if (idxLabel != UINT32_MAX)
6929 {
6930 iemNativeLabelDefine(pReNative, idxLabel, off);
6931
6932 /* int iemNativeHlpCheckBranchMiss(PVMCPUCC pVCpu) */
6933 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6934 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpCheckBranchMiss);
6935
6936 /* jump back to the return sequence. */
6937 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6938 }
6939 return off;
6940}
6941
6942
6943/**
6944 * Emits the code at the NeedCsLimChecking label.
6945 */
6946static uint32_t iemNativeEmitNeedCsLimChecking(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6947{
6948 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NeedCsLimChecking);
6949 if (idxLabel != UINT32_MAX)
6950 {
6951 iemNativeLabelDefine(pReNative, idxLabel, off);
6952
6953 /* int iemNativeHlpNeedCsLimChecking(PVMCPUCC pVCpu) */
6954 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6955 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpNeedCsLimChecking);
6956
6957 /* jump back to the return sequence. */
6958 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6959 }
6960 return off;
6961}
6962
6963
6964/**
6965 * Emits the code at the ObsoleteTb label.
6966 */
6967static uint32_t iemNativeEmitObsoleteTb(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6968{
6969 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ObsoleteTb);
6970 if (idxLabel != UINT32_MAX)
6971 {
6972 iemNativeLabelDefine(pReNative, idxLabel, off);
6973
6974 /* int iemNativeHlpObsoleteTb(PVMCPUCC pVCpu) */
6975 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6976 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpObsoleteTb);
6977
6978 /* jump back to the return sequence. */
6979 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6980 }
6981 return off;
6982}
6983
6984
6985/**
6986 * Emits the code at the RaiseGP0 label.
6987 */
6988static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6989{
6990 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseGp0);
6991 if (idxLabel != UINT32_MAX)
6992 {
6993 iemNativeLabelDefine(pReNative, idxLabel, off);
6994
6995 /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu) */
6996 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6997 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
6998
6999 /* jump back to the return sequence. */
7000 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7001 }
7002 return off;
7003}
7004
7005
7006/**
7007 * Emits the code at the RaiseNm label.
7008 */
7009static uint32_t iemNativeEmitRaiseNm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7010{
7011 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseNm);
7012 if (idxLabel != UINT32_MAX)
7013 {
7014 iemNativeLabelDefine(pReNative, idxLabel, off);
7015
7016 /* iemNativeHlpExecRaiseNm(PVMCPUCC pVCpu) */
7017 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7018 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseNm);
7019
7020 /* jump back to the return sequence. */
7021 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7022 }
7023 return off;
7024}
7025
7026
7027/**
7028 * Emits the code at the RaiseUd label.
7029 */
7030static uint32_t iemNativeEmitRaiseUd(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7031{
7032 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseUd);
7033 if (idxLabel != UINT32_MAX)
7034 {
7035 iemNativeLabelDefine(pReNative, idxLabel, off);
7036
7037 /* iemNativeHlpExecRaiseUd(PVMCPUCC pVCpu) */
7038 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7039 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseUd);
7040
7041 /* jump back to the return sequence. */
7042 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7043 }
7044 return off;
7045}
7046
7047
7048/**
7049 * Emits the code at the RaiseMf label.
7050 */
7051static uint32_t iemNativeEmitRaiseMf(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7052{
7053 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseMf);
7054 if (idxLabel != UINT32_MAX)
7055 {
7056 iemNativeLabelDefine(pReNative, idxLabel, off);
7057
7058 /* iemNativeHlpExecRaiseMf(PVMCPUCC pVCpu) */
7059 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7060 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseMf);
7061
7062 /* jump back to the return sequence. */
7063 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7064 }
7065 return off;
7066}
7067
7068
7069/**
7070 * Emits the code at the RaiseXf label.
7071 */
7072static uint32_t iemNativeEmitRaiseXf(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7073{
7074 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseXf);
7075 if (idxLabel != UINT32_MAX)
7076 {
7077 iemNativeLabelDefine(pReNative, idxLabel, off);
7078
7079 /* iemNativeHlpExecRaiseXf(PVMCPUCC pVCpu) */
7080 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7081 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseXf);
7082
7083 /* jump back to the return sequence. */
7084 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7085 }
7086 return off;
7087}
7088
7089
7090/**
7091 * Emits the code at the ReturnWithFlags label (returns
7092 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
7093 */
7094static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7095{
7096 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
7097 if (idxLabel != UINT32_MAX)
7098 {
7099 iemNativeLabelDefine(pReNative, idxLabel, off);
7100
7101 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
7102
7103 /* jump back to the return sequence. */
7104 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7105 }
7106 return off;
7107}
7108
7109
7110/**
7111 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
7112 */
7113static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7114{
7115 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
7116 if (idxLabel != UINT32_MAX)
7117 {
7118 iemNativeLabelDefine(pReNative, idxLabel, off);
7119
7120 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
7121
7122 /* jump back to the return sequence. */
7123 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7124 }
7125 return off;
7126}
7127
7128
7129/**
7130 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
7131 */
7132static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7133{
7134 /*
7135 * Generate the rc + rcPassUp fiddling code if needed.
7136 */
7137 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
7138 if (idxLabel != UINT32_MAX)
7139 {
7140 iemNativeLabelDefine(pReNative, idxLabel, off);
7141
7142 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
7143#ifdef RT_ARCH_AMD64
7144# ifdef RT_OS_WINDOWS
7145# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7146 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
7147# endif
7148 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
7149 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
7150# else
7151 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
7152 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
7153# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7154 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
7155# endif
7156# endif
7157# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7158 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
7159# endif
7160
7161#else
7162 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
7163 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7164 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
7165#endif
7166
7167 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
7168 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7169 }
7170 return off;
7171}
7172
7173
7174/**
7175 * Emits a standard epilog.
7176 */
7177static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
7178{
7179 *pidxReturnLabel = UINT32_MAX;
7180
7181 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
7182 off = iemNativeRegFlushPendingWrites(pReNative, off);
7183
7184 /*
7185 * Successful return, so clear the return register (eax, w0).
7186 */
7187 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
7188
7189 /*
7190 * Define label for common return point.
7191 */
7192 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
7193 *pidxReturnLabel = idxReturn;
7194
7195 /*
7196 * Restore registers and return.
7197 */
7198#ifdef RT_ARCH_AMD64
7199 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
7200
7201 /* Reposition esp at the r15 restore point. */
7202 pbCodeBuf[off++] = X86_OP_REX_W;
7203 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
7204 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
7205 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
7206
7207 /* Pop non-volatile registers and return */
7208 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
7209 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
7210 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
7211 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
7212 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
7213 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
7214 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
7215 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
7216# ifdef RT_OS_WINDOWS
7217 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
7218 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
7219# endif
7220 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
7221 pbCodeBuf[off++] = 0xc9; /* leave */
7222 pbCodeBuf[off++] = 0xc3; /* ret */
7223 pbCodeBuf[off++] = 0xcc; /* int3 poison */
7224
7225#elif RT_ARCH_ARM64
7226 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7227
7228 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
7229 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
7230 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
7231 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
7232 IEMNATIVE_FRAME_VAR_SIZE / 8);
7233 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
7234 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7235 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
7236 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7237 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
7238 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7239 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
7240 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7241 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
7242 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7243 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
7244 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
7245
7246 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
7247 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
7248 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
7249 IEMNATIVE_FRAME_SAVE_REG_SIZE);
7250
7251 /* retab / ret */
7252# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
7253 if (1)
7254 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
7255 else
7256# endif
7257 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
7258
7259#else
7260# error "port me"
7261#endif
7262 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7263
7264 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
7265}
7266
7267
7268/**
7269 * Emits a standard prolog.
7270 */
7271static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7272{
7273#ifdef RT_ARCH_AMD64
7274 /*
7275 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
7276 * reserving 64 bytes for stack variables plus 4 non-register argument
7277 * slots. Fixed register assignment: xBX = pReNative;
7278 *
7279 * Since we always do the same register spilling, we can use the same
7280 * unwind description for all the code.
7281 */
7282 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7283 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
7284 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
7285 pbCodeBuf[off++] = 0x8b;
7286 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
7287 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
7288 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
7289# ifdef RT_OS_WINDOWS
7290 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
7291 pbCodeBuf[off++] = 0x8b;
7292 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
7293 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
7294 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
7295# else
7296 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
7297 pbCodeBuf[off++] = 0x8b;
7298 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
7299# endif
7300 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
7301 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
7302 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
7303 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
7304 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
7305 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
7306 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
7307 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
7308
7309# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
7310 /* Save the frame pointer. */
7311 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
7312# endif
7313
7314 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
7315 X86_GREG_xSP,
7316 IEMNATIVE_FRAME_ALIGN_SIZE
7317 + IEMNATIVE_FRAME_VAR_SIZE
7318 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
7319 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
7320 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
7321 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
7322 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
7323
7324#elif RT_ARCH_ARM64
7325 /*
7326 * We set up a stack frame exactly like on x86, only we have to push the
7327 * return address our selves here. We save all non-volatile registers.
7328 */
7329 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
7330
7331# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
7332 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
7333 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
7334 * in any way conditional, so just emitting this instructions now and hoping for the best... */
7335 /* pacibsp */
7336 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
7337# endif
7338
7339 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
7340 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
7341 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
7342 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
7343 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
7344 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
7345 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7346 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
7347 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7348 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
7349 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7350 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
7351 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7352 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
7353 /* Save the BP and LR (ret address) registers at the top of the frame. */
7354 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7355 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
7356 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
7357 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
7358 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
7359 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
7360
7361 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
7362 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
7363
7364 /* mov r28, r0 */
7365 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
7366 /* mov r27, r1 */
7367 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
7368
7369# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
7370 /* Save the frame pointer. */
7371 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
7372 ARMV8_A64_REG_X2);
7373# endif
7374
7375#else
7376# error "port me"
7377#endif
7378 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7379 return off;
7380}
7381
7382
7383
7384
7385/*********************************************************************************************************************************
7386* Emitters for IEM_MC_BEGIN and IEM_MC_END. *
7387*********************************************************************************************************************************/
7388
7389#define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
7390 { \
7391 Assert(pReNative->Core.bmVars == 0); \
7392 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
7393 Assert(pReNative->Core.bmStack == 0); \
7394 pReNative->fMc = (a_fMcFlags); \
7395 pReNative->fCImpl = (a_fCImplFlags); \
7396 pReNative->cArgs = ((a_cArgs) + iemNativeArgGetHiddenArgCount(pReNative))
7397
7398/** We have to get to the end in recompilation mode, as otherwise we won't
7399 * generate code for all the IEM_MC_IF_XXX branches. */
7400#define IEM_MC_END() \
7401 iemNativeVarFreeAll(pReNative); \
7402 } return off
7403
7404
7405
7406/*********************************************************************************************************************************
7407* Native Emitter Support. *
7408*********************************************************************************************************************************/
7409
7410
7411#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
7412
7413#define IEM_MC_NATIVE_ELSE() } else {
7414
7415#define IEM_MC_NATIVE_ENDIF() } ((void)0)
7416
7417
7418#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
7419 off = a_fnEmitter(pReNative, off)
7420
7421#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
7422 off = a_fnEmitter(pReNative, off, (a0))
7423
7424#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
7425 off = a_fnEmitter(pReNative, off, (a0), (a1))
7426
7427#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
7428 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
7429
7430#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
7431 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
7432
7433#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
7434 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
7435
7436#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
7437 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
7438
7439#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
7440 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
7441
7442#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
7443 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
7444
7445
7446
7447/*********************************************************************************************************************************
7448* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
7449*********************************************************************************************************************************/
7450
7451#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
7452 pReNative->fMc = 0; \
7453 pReNative->fCImpl = (a_fFlags); \
7454 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
7455
7456
7457#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
7458 pReNative->fMc = 0; \
7459 pReNative->fCImpl = (a_fFlags); \
7460 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
7461
7462DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7463 uint8_t idxInstr, uint64_t a_fGstShwFlush,
7464 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
7465{
7466 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
7467}
7468
7469
7470#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
7471 pReNative->fMc = 0; \
7472 pReNative->fCImpl = (a_fFlags); \
7473 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
7474 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
7475
7476DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7477 uint8_t idxInstr, uint64_t a_fGstShwFlush,
7478 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
7479{
7480 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
7481}
7482
7483
7484#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
7485 pReNative->fMc = 0; \
7486 pReNative->fCImpl = (a_fFlags); \
7487 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
7488 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
7489
7490DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7491 uint8_t idxInstr, uint64_t a_fGstShwFlush,
7492 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
7493 uint64_t uArg2)
7494{
7495 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
7496}
7497
7498
7499
7500/*********************************************************************************************************************************
7501* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
7502*********************************************************************************************************************************/
7503
7504/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
7505 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
7506DECL_INLINE_THROW(uint32_t)
7507iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7508{
7509 /*
7510 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
7511 * return with special status code and make the execution loop deal with
7512 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
7513 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
7514 * could continue w/o interruption, it probably will drop into the
7515 * debugger, so not worth the effort of trying to services it here and we
7516 * just lump it in with the handling of the others.
7517 *
7518 * To simplify the code and the register state management even more (wrt
7519 * immediate in AND operation), we always update the flags and skip the
7520 * extra check associated conditional jump.
7521 */
7522 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
7523 <= UINT32_MAX);
7524#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
7525 AssertMsg( pReNative->idxCurCall == 0
7526 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)),
7527 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)));
7528#endif
7529
7530 /*
7531 * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
7532 * any pending register writes must be flushed.
7533 */
7534 off = iemNativeRegFlushPendingWrites(pReNative, off);
7535
7536 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7537 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
7538 true /*fSkipLivenessAssert*/);
7539 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
7540 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
7541 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
7542 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
7543 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
7544
7545 /* Free but don't flush the EFLAGS register. */
7546 iemNativeRegFreeTmp(pReNative, idxEflReg);
7547
7548 return off;
7549}
7550
7551
7552/** The VINF_SUCCESS dummy. */
7553template<int const a_rcNormal>
7554DECL_FORCE_INLINE(uint32_t)
7555iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
7556{
7557 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
7558 if (a_rcNormal != VINF_SUCCESS)
7559 {
7560#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7561 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7562#else
7563 RT_NOREF_PV(idxInstr);
7564#endif
7565
7566 /* As this code returns from the TB any pending register writes must be flushed. */
7567 off = iemNativeRegFlushPendingWrites(pReNative, off);
7568
7569 return iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_ReturnBreak);
7570 }
7571 return off;
7572}
7573
7574
7575#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
7576 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
7577 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7578
7579#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
7580 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
7581 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7582 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7583
7584/** Same as iemRegAddToRip64AndFinishingNoFlags. */
7585DECL_INLINE_THROW(uint32_t)
7586iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
7587{
7588#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
7589# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
7590 if (!pReNative->Core.offPc)
7591 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7592# endif
7593
7594 /* Allocate a temporary PC register. */
7595 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
7596
7597 /* Perform the addition and store the result. */
7598 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
7599 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7600
7601 /* Free but don't flush the PC register. */
7602 iemNativeRegFreeTmp(pReNative, idxPcReg);
7603#endif
7604
7605#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7606 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
7607
7608 pReNative->Core.offPc += cbInstr;
7609# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
7610 off = iemNativePcAdjustCheck(pReNative, off);
7611# endif
7612 if (pReNative->cCondDepth)
7613 off = iemNativeEmitPcWriteback(pReNative, off);
7614 else
7615 pReNative->Core.cInstrPcUpdateSkipped++;
7616#endif
7617
7618 return off;
7619}
7620
7621
7622#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
7623 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
7624 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7625
7626#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
7627 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
7628 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7629 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7630
7631/** Same as iemRegAddToEip32AndFinishingNoFlags. */
7632DECL_INLINE_THROW(uint32_t)
7633iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
7634{
7635#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
7636# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
7637 if (!pReNative->Core.offPc)
7638 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7639# endif
7640
7641 /* Allocate a temporary PC register. */
7642 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
7643
7644 /* Perform the addition and store the result. */
7645 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
7646 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7647
7648 /* Free but don't flush the PC register. */
7649 iemNativeRegFreeTmp(pReNative, idxPcReg);
7650#endif
7651
7652#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7653 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
7654
7655 pReNative->Core.offPc += cbInstr;
7656# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
7657 off = iemNativePcAdjustCheck(pReNative, off);
7658# endif
7659 if (pReNative->cCondDepth)
7660 off = iemNativeEmitPcWriteback(pReNative, off);
7661 else
7662 pReNative->Core.cInstrPcUpdateSkipped++;
7663#endif
7664
7665 return off;
7666}
7667
7668
7669#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
7670 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
7671 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7672
7673#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
7674 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
7675 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7676 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7677
7678/** Same as iemRegAddToIp16AndFinishingNoFlags. */
7679DECL_INLINE_THROW(uint32_t)
7680iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
7681{
7682#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
7683# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
7684 if (!pReNative->Core.offPc)
7685 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7686# endif
7687
7688 /* Allocate a temporary PC register. */
7689 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
7690
7691 /* Perform the addition and store the result. */
7692 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
7693 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
7694 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7695
7696 /* Free but don't flush the PC register. */
7697 iemNativeRegFreeTmp(pReNative, idxPcReg);
7698#endif
7699
7700#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7701 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
7702
7703 pReNative->Core.offPc += cbInstr;
7704# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
7705 off = iemNativePcAdjustCheck(pReNative, off);
7706# endif
7707 if (pReNative->cCondDepth)
7708 off = iemNativeEmitPcWriteback(pReNative, off);
7709 else
7710 pReNative->Core.cInstrPcUpdateSkipped++;
7711#endif
7712
7713 return off;
7714}
7715
7716
7717
7718/*********************************************************************************************************************************
7719* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
7720*********************************************************************************************************************************/
7721
7722#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
7723 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
7724 (a_enmEffOpSize), pCallEntry->idxInstr); \
7725 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7726
7727#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
7728 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
7729 (a_enmEffOpSize), pCallEntry->idxInstr); \
7730 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7731 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7732
7733#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
7734 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
7735 IEMMODE_16BIT, pCallEntry->idxInstr); \
7736 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7737
7738#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
7739 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
7740 IEMMODE_16BIT, pCallEntry->idxInstr); \
7741 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7742 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7743
7744#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
7745 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
7746 IEMMODE_64BIT, pCallEntry->idxInstr); \
7747 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7748
7749#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
7750 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
7751 IEMMODE_64BIT, pCallEntry->idxInstr); \
7752 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7753 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7754
7755/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
7756 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
7757 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
7758DECL_INLINE_THROW(uint32_t)
7759iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
7760 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
7761{
7762 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
7763
7764 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
7765 off = iemNativeRegFlushPendingWrites(pReNative, off);
7766
7767#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7768 Assert(pReNative->Core.offPc == 0);
7769
7770 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
7771#endif
7772
7773 /* Allocate a temporary PC register. */
7774 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
7775
7776 /* Perform the addition. */
7777 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
7778
7779 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
7780 {
7781 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
7782 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
7783 }
7784 else
7785 {
7786 /* Just truncate the result to 16-bit IP. */
7787 Assert(enmEffOpSize == IEMMODE_16BIT);
7788 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
7789 }
7790 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7791
7792 /* Free but don't flush the PC register. */
7793 iemNativeRegFreeTmp(pReNative, idxPcReg);
7794
7795 return off;
7796}
7797
7798
7799#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
7800 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
7801 (a_enmEffOpSize), pCallEntry->idxInstr); \
7802 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7803
7804#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
7805 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
7806 (a_enmEffOpSize), pCallEntry->idxInstr); \
7807 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7808 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7809
7810#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
7811 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
7812 IEMMODE_16BIT, pCallEntry->idxInstr); \
7813 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7814
7815#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
7816 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
7817 IEMMODE_16BIT, pCallEntry->idxInstr); \
7818 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7819 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7820
7821#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
7822 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
7823 IEMMODE_32BIT, pCallEntry->idxInstr); \
7824 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7825
7826#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
7827 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
7828 IEMMODE_32BIT, pCallEntry->idxInstr); \
7829 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7830 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7831
7832/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
7833 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
7834 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
7835DECL_INLINE_THROW(uint32_t)
7836iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
7837 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
7838{
7839 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
7840
7841 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
7842 off = iemNativeRegFlushPendingWrites(pReNative, off);
7843
7844#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7845 Assert(pReNative->Core.offPc == 0);
7846
7847 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
7848#endif
7849
7850 /* Allocate a temporary PC register. */
7851 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
7852
7853 /* Perform the addition. */
7854 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
7855
7856 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
7857 if (enmEffOpSize == IEMMODE_16BIT)
7858 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
7859
7860 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
7861/** @todo we can skip this in 32-bit FLAT mode. */
7862 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
7863
7864 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7865
7866 /* Free but don't flush the PC register. */
7867 iemNativeRegFreeTmp(pReNative, idxPcReg);
7868
7869 return off;
7870}
7871
7872
7873#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
7874 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
7875 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7876
7877#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
7878 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
7879 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7880 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7881
7882#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
7883 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
7884 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7885
7886#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
7887 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
7888 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7889 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7890
7891#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
7892 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
7893 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7894
7895#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
7896 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
7897 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7898 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7899
7900/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
7901DECL_INLINE_THROW(uint32_t)
7902iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7903 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
7904{
7905 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
7906 off = iemNativeRegFlushPendingWrites(pReNative, off);
7907
7908#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7909 Assert(pReNative->Core.offPc == 0);
7910
7911 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
7912#endif
7913
7914 /* Allocate a temporary PC register. */
7915 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
7916
7917 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
7918 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
7919 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
7920 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
7921 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7922
7923 /* Free but don't flush the PC register. */
7924 iemNativeRegFreeTmp(pReNative, idxPcReg);
7925
7926 return off;
7927}
7928
7929
7930
7931/*********************************************************************************************************************************
7932* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
7933*********************************************************************************************************************************/
7934
7935/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
7936#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
7937 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
7938
7939/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
7940#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
7941 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
7942
7943/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
7944#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
7945 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
7946
7947/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
7948 * clears flags. */
7949#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
7950 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
7951 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
7952
7953/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
7954 * clears flags. */
7955#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
7956 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
7957 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
7958
7959/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
7960 * clears flags. */
7961#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
7962 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
7963 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
7964
7965#undef IEM_MC_SET_RIP_U16_AND_FINISH
7966
7967
7968/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
7969#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
7970 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
7971
7972/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
7973#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
7974 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
7975
7976/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
7977 * clears flags. */
7978#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
7979 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
7980 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
7981
7982/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
7983 * and clears flags. */
7984#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
7985 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
7986 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
7987
7988#undef IEM_MC_SET_RIP_U32_AND_FINISH
7989
7990
7991/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
7992#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
7993 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
7994
7995/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
7996 * and clears flags. */
7997#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
7998 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
7999 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
8000
8001#undef IEM_MC_SET_RIP_U64_AND_FINISH
8002
8003
8004/** Same as iemRegRipJumpU16AndFinishNoFlags,
8005 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
8006DECL_INLINE_THROW(uint32_t)
8007iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
8008 uint8_t idxInstr, uint8_t cbVar)
8009{
8010 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
8011 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
8012
8013 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
8014 off = iemNativeRegFlushPendingWrites(pReNative, off);
8015
8016#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8017 Assert(pReNative->Core.offPc == 0);
8018
8019 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
8020#endif
8021
8022 /* Get a register with the new PC loaded from idxVarPc.
8023 Note! This ASSUMES that the high bits of the GPR is zeroed. */
8024 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
8025
8026 /* Check limit (may #GP(0) + exit TB). */
8027 if (!f64Bit)
8028/** @todo we can skip this test in FLAT 32-bit mode. */
8029 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
8030 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
8031 else if (cbVar > sizeof(uint32_t))
8032 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
8033
8034 /* Store the result. */
8035 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
8036
8037 iemNativeVarRegisterRelease(pReNative, idxVarPc);
8038 /** @todo implictly free the variable? */
8039
8040 return off;
8041}
8042
8043
8044
8045/*********************************************************************************************************************************
8046* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
8047*********************************************************************************************************************************/
8048
8049#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
8050 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
8051
8052/**
8053 * Emits code to check if a \#NM exception should be raised.
8054 *
8055 * @returns New code buffer offset, UINT32_MAX on failure.
8056 * @param pReNative The native recompile state.
8057 * @param off The code buffer offset.
8058 * @param idxInstr The current instruction.
8059 */
8060DECL_INLINE_THROW(uint32_t)
8061iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
8062{
8063 /*
8064 * Make sure we don't have any outstanding guest register writes as we may
8065 * raise an #NM and all guest register must be up to date in CPUMCTX.
8066 *
8067 * @todo r=aeichner Can we postpone this to the RaiseNm path?
8068 */
8069 off = iemNativeRegFlushPendingWrites(pReNative, off);
8070
8071#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8072 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8073#else
8074 RT_NOREF(idxInstr);
8075#endif
8076
8077 /* Allocate a temporary CR0 register. */
8078 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
8079 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
8080
8081 /*
8082 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
8083 * return raisexcpt();
8084 */
8085 /* Test and jump. */
8086 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM | X86_CR0_TS, idxLabelRaiseNm);
8087
8088 /* Free but don't flush the CR0 register. */
8089 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
8090
8091 return off;
8092}
8093
8094
8095#define IEM_MC_MAYBE_RAISE_FPU_XCPT() \
8096 off = iemNativeEmitMaybeFpuException(pReNative, off, pCallEntry->idxInstr)
8097
8098/**
8099 * Emits code to check if a \#MF exception should be raised.
8100 *
8101 * @returns New code buffer offset, UINT32_MAX on failure.
8102 * @param pReNative The native recompile state.
8103 * @param off The code buffer offset.
8104 * @param idxInstr The current instruction.
8105 */
8106DECL_INLINE_THROW(uint32_t)
8107iemNativeEmitMaybeRaiseFpuException(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
8108{
8109 /*
8110 * Make sure we don't have any outstanding guest register writes as we may
8111 * raise an #MF and all guest register must be up to date in CPUMCTX.
8112 *
8113 * @todo r=aeichner Can we postpone this to the RaiseMf path?
8114 */
8115 off = iemNativeRegFlushPendingWrites(pReNative, off);
8116
8117#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8118 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8119#else
8120 RT_NOREF(idxInstr);
8121#endif
8122
8123 /* Allocate a temporary FSW register. */
8124 uint8_t const idxFpuFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw, kIemNativeGstRegUse_ReadOnly);
8125 uint8_t const idxLabelRaiseMf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseMf);
8126
8127 /*
8128 * if (FSW & X86_FSW_ES != 0)
8129 * return raisexcpt();
8130 */
8131 /* Test and jump. */
8132 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxFpuFswReg, X86_FSW_ES, idxLabelRaiseMf);
8133
8134 /* Free but don't flush the FSW register. */
8135 iemNativeRegFreeTmp(pReNative, idxFpuFswReg);
8136
8137 return off;
8138}
8139
8140
8141#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
8142 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
8143
8144/**
8145 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
8146 *
8147 * @returns New code buffer offset, UINT32_MAX on failure.
8148 * @param pReNative The native recompile state.
8149 * @param off The code buffer offset.
8150 * @param idxInstr The current instruction.
8151 */
8152DECL_INLINE_THROW(uint32_t)
8153iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
8154{
8155 /*
8156 * Make sure we don't have any outstanding guest register writes as we may
8157 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
8158 *
8159 * @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path?
8160 */
8161 off = iemNativeRegFlushPendingWrites(pReNative, off, false /*fFlushShadows*/);
8162
8163#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8164 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8165#else
8166 RT_NOREF(idxInstr);
8167#endif
8168
8169 /* Allocate a temporary CR0 and CR4 register. */
8170 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
8171 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4, kIemNativeGstRegUse_ReadOnly);
8172 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
8173 uint8_t const idxLabelRaiseUd = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseUd);
8174
8175 /** @todo r=aeichner Optimize this more later to have less compares and branches,
8176 * (see IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() in IEMMc.h but check that it has some
8177 * actual performance benefit first). */
8178 /*
8179 * if (cr0 & X86_CR0_EM)
8180 * return raisexcpt();
8181 */
8182 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxCr0Reg, X86_CR0_EM_BIT, idxLabelRaiseUd);
8183 /*
8184 * if (!(cr4 & X86_CR4_OSFXSR))
8185 * return raisexcpt();
8186 */
8187 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxCr4Reg, X86_CR4_OSFXSR_BIT, idxLabelRaiseUd);
8188 /*
8189 * if (cr0 & X86_CR0_TS)
8190 * return raisexcpt();
8191 */
8192 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxCr0Reg, X86_CR0_TS_BIT, idxLabelRaiseNm);
8193
8194 /* Free but don't flush the CR0 and CR4 register. */
8195 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
8196 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
8197
8198 return off;
8199}
8200
8201
8202#define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
8203 off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
8204
8205/**
8206 * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
8207 *
8208 * @returns New code buffer offset, UINT32_MAX on failure.
8209 * @param pReNative The native recompile state.
8210 * @param off The code buffer offset.
8211 * @param idxInstr The current instruction.
8212 */
8213DECL_INLINE_THROW(uint32_t)
8214iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
8215{
8216 /*
8217 * Make sure we don't have any outstanding guest register writes as we may
8218 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
8219 *
8220 * @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path?
8221 */
8222 off = iemNativeRegFlushPendingWrites(pReNative, off, false /*fFlushShadows*/);
8223
8224#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8225 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8226#else
8227 RT_NOREF(idxInstr);
8228#endif
8229
8230 /* Allocate a temporary CR0, CR4 and XCR0 register. */
8231 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
8232 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4, kIemNativeGstRegUse_ReadOnly);
8233 uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0, kIemNativeGstRegUse_ReadOnly);
8234 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
8235 uint8_t const idxLabelRaiseUd = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseUd);
8236
8237 /** @todo r=aeichner Optimize this more later to have less compares and branches,
8238 * (see IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() in IEMMc.h but check that it has some
8239 * actual performance benefit first). */
8240 /*
8241 * if ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE))
8242 * return raisexcpt();
8243 */
8244 const uint8_t idxRegTmp = iemNativeRegAllocTmpImm(pReNative, &off, XSAVE_C_YMM | XSAVE_C_SSE);
8245 off = iemNativeEmitAndGprByGpr(pReNative, off, idxRegTmp, idxXcr0Reg);
8246 off = iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(pReNative, off, idxRegTmp, XSAVE_C_YMM | XSAVE_C_SSE, idxLabelRaiseUd);
8247 iemNativeRegFreeTmp(pReNative, idxRegTmp);
8248
8249 /*
8250 * if (!(cr4 & X86_CR4_OSXSAVE))
8251 * return raisexcpt();
8252 */
8253 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxCr4Reg, X86_CR4_OSXSAVE_BIT, idxLabelRaiseUd);
8254 /*
8255 * if (cr0 & X86_CR0_TS)
8256 * return raisexcpt();
8257 */
8258 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxCr0Reg, X86_CR0_TS_BIT, idxLabelRaiseNm);
8259
8260 /* Free but don't flush the CR0, CR4 and XCR0 register. */
8261 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
8262 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
8263 iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
8264
8265 return off;
8266}
8267
8268
8269#define IEM_MC_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT() \
8270 off = iemNativeEmitRaiseSseAvxSimdFpXcpt(pReNative, off, pCallEntry->idxInstr)
8271
8272/**
8273 * Emits code to raise a SIMD floating point (either \#UD or \#XF) should be raised.
8274 *
8275 * @returns New code buffer offset, UINT32_MAX on failure.
8276 * @param pReNative The native recompile state.
8277 * @param off The code buffer offset.
8278 * @param idxInstr The current instruction.
8279 */
8280DECL_INLINE_THROW(uint32_t)
8281iemNativeEmitRaiseSseAvxSimdFpXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
8282{
8283 /*
8284 * Make sure we don't have any outstanding guest register writes as we may
8285 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
8286 *
8287 * @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path?
8288 */
8289 off = iemNativeRegFlushPendingWrites(pReNative, off);
8290
8291#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8292 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8293#else
8294 RT_NOREF(idxInstr);
8295#endif
8296
8297 /* Allocate a temporary CR4 register. */
8298 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4, kIemNativeGstRegUse_ReadOnly);
8299 uint8_t const idxLabelRaiseXf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseXf);
8300 uint8_t const idxLabelRaiseUd = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseUd);
8301
8302 /*
8303 * if (!(cr4 & X86_CR4_OSXMMEEXCPT))
8304 * return raisexcpt();
8305 */
8306 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxCr4Reg, X86_CR4_OSXMMEEXCPT_BIT, idxLabelRaiseXf);
8307
8308 /* raise \#UD exception unconditionally. */
8309 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelRaiseUd);
8310
8311 /* Free but don't flush the CR4 register. */
8312 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
8313
8314 return off;
8315}
8316
8317
8318
8319/*********************************************************************************************************************************
8320* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
8321*********************************************************************************************************************************/
8322
8323/**
8324 * Pushes an IEM_MC_IF_XXX onto the condition stack.
8325 *
8326 * @returns Pointer to the condition stack entry on success, NULL on failure
8327 * (too many nestings)
8328 */
8329DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
8330{
8331#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8332 *poff = iemNativeRegFlushPendingWrites(pReNative, *poff);
8333#endif
8334
8335 uint32_t const idxStack = pReNative->cCondDepth;
8336 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
8337
8338 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
8339 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
8340
8341 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
8342 pEntry->fInElse = false;
8343 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
8344 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
8345
8346 return pEntry;
8347}
8348
8349
8350/**
8351 * Start of the if-block, snapshotting the register and variable state.
8352 */
8353DECL_INLINE_THROW(void)
8354iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
8355{
8356 Assert(offIfBlock != UINT32_MAX);
8357 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
8358 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
8359 Assert(!pEntry->fInElse);
8360
8361 /* Define the start of the IF block if request or for disassembly purposes. */
8362 if (idxLabelIf != UINT32_MAX)
8363 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
8364#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8365 else
8366 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
8367#else
8368 RT_NOREF(offIfBlock);
8369#endif
8370
8371#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8372 Assert(pReNative->Core.offPc == 0);
8373#endif
8374
8375 /* Copy the initial state so we can restore it in the 'else' block. */
8376 pEntry->InitialState = pReNative->Core;
8377}
8378
8379
8380#define IEM_MC_ELSE() } while (0); \
8381 off = iemNativeEmitElse(pReNative, off); \
8382 do {
8383
8384/** Emits code related to IEM_MC_ELSE. */
8385DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
8386{
8387 /* Check sanity and get the conditional stack entry. */
8388 Assert(off != UINT32_MAX);
8389 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
8390 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
8391 Assert(!pEntry->fInElse);
8392
8393 /* Jump to the endif */
8394 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
8395
8396 /* Define the else label and enter the else part of the condition. */
8397 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
8398 pEntry->fInElse = true;
8399
8400#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8401 Assert(pReNative->Core.offPc == 0);
8402#endif
8403
8404 /* Snapshot the core state so we can do a merge at the endif and restore
8405 the snapshot we took at the start of the if-block. */
8406 pEntry->IfFinalState = pReNative->Core;
8407 pReNative->Core = pEntry->InitialState;
8408
8409 return off;
8410}
8411
8412
8413#define IEM_MC_ENDIF() } while (0); \
8414 off = iemNativeEmitEndIf(pReNative, off)
8415
8416/** Emits code related to IEM_MC_ENDIF. */
8417DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
8418{
8419 /* Check sanity and get the conditional stack entry. */
8420 Assert(off != UINT32_MAX);
8421 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
8422 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
8423
8424#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8425 Assert(pReNative->Core.offPc == 0);
8426#endif
8427
8428 /*
8429 * Now we have find common group with the core state at the end of the
8430 * if-final. Use the smallest common denominator and just drop anything
8431 * that isn't the same in both states.
8432 */
8433 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
8434 * which is why we're doing this at the end of the else-block.
8435 * But we'd need more info about future for that to be worth the effort. */
8436 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
8437 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
8438 {
8439 /* shadow guest stuff first. */
8440 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
8441 if (fGstRegs)
8442 {
8443 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
8444 do
8445 {
8446 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
8447 fGstRegs &= ~RT_BIT_64(idxGstReg);
8448
8449 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
8450 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
8451 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
8452 {
8453 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
8454 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
8455 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
8456 }
8457 } while (fGstRegs);
8458 }
8459 else
8460 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
8461
8462 /* Check variables next. For now we must require them to be identical
8463 or stuff we can recreate. */
8464 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
8465 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
8466 if (fVars)
8467 {
8468 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
8469 do
8470 {
8471 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
8472 fVars &= ~RT_BIT_32(idxVar);
8473
8474 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
8475 {
8476 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
8477 continue;
8478 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
8479 {
8480 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8481 if (idxHstReg != UINT8_MAX)
8482 {
8483 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8484 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8485 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
8486 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
8487 }
8488 continue;
8489 }
8490 }
8491 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
8492 continue;
8493
8494 /* Irreconcilable, so drop it. */
8495 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8496 if (idxHstReg != UINT8_MAX)
8497 {
8498 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8499 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8500 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
8501 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
8502 }
8503 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
8504 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8505 } while (fVars);
8506 }
8507
8508 /* Finally, check that the host register allocations matches. */
8509 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
8510 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
8511 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
8512 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
8513 }
8514
8515 /*
8516 * Define the endif label and maybe the else one if we're still in the 'if' part.
8517 */
8518 if (!pEntry->fInElse)
8519 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
8520 else
8521 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
8522 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
8523
8524 /* Pop the conditional stack.*/
8525 pReNative->cCondDepth -= 1;
8526
8527 return off;
8528}
8529
8530
8531#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
8532 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
8533 do {
8534
8535/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
8536DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
8537{
8538 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8539
8540 /* Get the eflags. */
8541 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8542 kIemNativeGstRegUse_ReadOnly);
8543
8544 /* Test and jump. */
8545 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
8546
8547 /* Free but don't flush the EFlags register. */
8548 iemNativeRegFreeTmp(pReNative, idxEflReg);
8549
8550 /* Make a copy of the core state now as we start the if-block. */
8551 iemNativeCondStartIfBlock(pReNative, off);
8552
8553 return off;
8554}
8555
8556
8557#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
8558 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
8559 do {
8560
8561/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
8562DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
8563{
8564 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8565
8566 /* Get the eflags. */
8567 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8568 kIemNativeGstRegUse_ReadOnly);
8569
8570 /* Test and jump. */
8571 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
8572
8573 /* Free but don't flush the EFlags register. */
8574 iemNativeRegFreeTmp(pReNative, idxEflReg);
8575
8576 /* Make a copy of the core state now as we start the if-block. */
8577 iemNativeCondStartIfBlock(pReNative, off);
8578
8579 return off;
8580}
8581
8582
8583#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
8584 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
8585 do {
8586
8587/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
8588DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
8589{
8590 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8591
8592 /* Get the eflags. */
8593 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8594 kIemNativeGstRegUse_ReadOnly);
8595
8596 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
8597 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
8598
8599 /* Test and jump. */
8600 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
8601
8602 /* Free but don't flush the EFlags register. */
8603 iemNativeRegFreeTmp(pReNative, idxEflReg);
8604
8605 /* Make a copy of the core state now as we start the if-block. */
8606 iemNativeCondStartIfBlock(pReNative, off);
8607
8608 return off;
8609}
8610
8611
8612#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
8613 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
8614 do {
8615
8616/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
8617DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
8618{
8619 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8620
8621 /* Get the eflags. */
8622 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8623 kIemNativeGstRegUse_ReadOnly);
8624
8625 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
8626 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
8627
8628 /* Test and jump. */
8629 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
8630
8631 /* Free but don't flush the EFlags register. */
8632 iemNativeRegFreeTmp(pReNative, idxEflReg);
8633
8634 /* Make a copy of the core state now as we start the if-block. */
8635 iemNativeCondStartIfBlock(pReNative, off);
8636
8637 return off;
8638}
8639
8640
8641#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
8642 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
8643 do {
8644
8645#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
8646 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
8647 do {
8648
8649/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
8650DECL_INLINE_THROW(uint32_t)
8651iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8652 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
8653{
8654 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8655
8656 /* Get the eflags. */
8657 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8658 kIemNativeGstRegUse_ReadOnly);
8659
8660 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
8661 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
8662
8663 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
8664 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
8665 Assert(iBitNo1 != iBitNo2);
8666
8667#ifdef RT_ARCH_AMD64
8668 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
8669
8670 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
8671 if (iBitNo1 > iBitNo2)
8672 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
8673 else
8674 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
8675 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
8676
8677#elif defined(RT_ARCH_ARM64)
8678 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
8679 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8680
8681 /* and tmpreg, eflreg, #1<<iBitNo1 */
8682 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
8683
8684 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
8685 if (iBitNo1 > iBitNo2)
8686 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
8687 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
8688 else
8689 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
8690 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
8691
8692 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8693
8694#else
8695# error "Port me"
8696#endif
8697
8698 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
8699 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
8700 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
8701
8702 /* Free but don't flush the EFlags and tmp registers. */
8703 iemNativeRegFreeTmp(pReNative, idxTmpReg);
8704 iemNativeRegFreeTmp(pReNative, idxEflReg);
8705
8706 /* Make a copy of the core state now as we start the if-block. */
8707 iemNativeCondStartIfBlock(pReNative, off);
8708
8709 return off;
8710}
8711
8712
8713#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
8714 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
8715 do {
8716
8717#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
8718 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
8719 do {
8720
8721/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
8722 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
8723DECL_INLINE_THROW(uint32_t)
8724iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
8725 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
8726{
8727 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8728
8729 /* We need an if-block label for the non-inverted variant. */
8730 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
8731 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
8732
8733 /* Get the eflags. */
8734 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8735 kIemNativeGstRegUse_ReadOnly);
8736
8737 /* Translate the flag masks to bit numbers. */
8738 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
8739 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
8740
8741 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
8742 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
8743 Assert(iBitNo1 != iBitNo);
8744
8745 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
8746 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
8747 Assert(iBitNo2 != iBitNo);
8748 Assert(iBitNo2 != iBitNo1);
8749
8750#ifdef RT_ARCH_AMD64
8751 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
8752#elif defined(RT_ARCH_ARM64)
8753 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
8754#endif
8755
8756 /* Check for the lone bit first. */
8757 if (!fInverted)
8758 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
8759 else
8760 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
8761
8762 /* Then extract and compare the other two bits. */
8763#ifdef RT_ARCH_AMD64
8764 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
8765 if (iBitNo1 > iBitNo2)
8766 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
8767 else
8768 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
8769 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
8770
8771#elif defined(RT_ARCH_ARM64)
8772 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8773
8774 /* and tmpreg, eflreg, #1<<iBitNo1 */
8775 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
8776
8777 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
8778 if (iBitNo1 > iBitNo2)
8779 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
8780 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
8781 else
8782 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
8783 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
8784
8785 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8786
8787#else
8788# error "Port me"
8789#endif
8790
8791 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
8792 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
8793 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
8794
8795 /* Free but don't flush the EFlags and tmp registers. */
8796 iemNativeRegFreeTmp(pReNative, idxTmpReg);
8797 iemNativeRegFreeTmp(pReNative, idxEflReg);
8798
8799 /* Make a copy of the core state now as we start the if-block. */
8800 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
8801
8802 return off;
8803}
8804
8805
8806#define IEM_MC_IF_CX_IS_NZ() \
8807 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
8808 do {
8809
8810/** Emits code for IEM_MC_IF_CX_IS_NZ. */
8811DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
8812{
8813 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8814
8815 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
8816 kIemNativeGstRegUse_ReadOnly);
8817 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
8818 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
8819
8820 iemNativeCondStartIfBlock(pReNative, off);
8821 return off;
8822}
8823
8824
8825#define IEM_MC_IF_ECX_IS_NZ() \
8826 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
8827 do {
8828
8829#define IEM_MC_IF_RCX_IS_NZ() \
8830 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
8831 do {
8832
8833/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
8834DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
8835{
8836 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8837
8838 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
8839 kIemNativeGstRegUse_ReadOnly);
8840 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
8841 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
8842
8843 iemNativeCondStartIfBlock(pReNative, off);
8844 return off;
8845}
8846
8847
8848#define IEM_MC_IF_CX_IS_NOT_ONE() \
8849 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
8850 do {
8851
8852/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
8853DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
8854{
8855 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8856
8857 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
8858 kIemNativeGstRegUse_ReadOnly);
8859#ifdef RT_ARCH_AMD64
8860 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
8861#else
8862 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
8863 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
8864 iemNativeRegFreeTmp(pReNative, idxTmpReg);
8865#endif
8866 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
8867
8868 iemNativeCondStartIfBlock(pReNative, off);
8869 return off;
8870}
8871
8872
8873#define IEM_MC_IF_ECX_IS_NOT_ONE() \
8874 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
8875 do {
8876
8877#define IEM_MC_IF_RCX_IS_NOT_ONE() \
8878 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
8879 do {
8880
8881/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
8882DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
8883{
8884 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8885
8886 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
8887 kIemNativeGstRegUse_ReadOnly);
8888 if (f64Bit)
8889 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
8890 else
8891 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
8892 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
8893
8894 iemNativeCondStartIfBlock(pReNative, off);
8895 return off;
8896}
8897
8898
8899#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
8900 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
8901 do {
8902
8903#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
8904 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
8905 do {
8906
8907/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
8908 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
8909DECL_INLINE_THROW(uint32_t)
8910iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
8911{
8912 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8913
8914 /* We have to load both RCX and EFLAGS before we can start branching,
8915 otherwise we'll end up in the else-block with an inconsistent
8916 register allocator state.
8917 Doing EFLAGS first as it's more likely to be loaded, right? */
8918 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8919 kIemNativeGstRegUse_ReadOnly);
8920 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
8921 kIemNativeGstRegUse_ReadOnly);
8922
8923 /** @todo we could reduce this to a single branch instruction by spending a
8924 * temporary register and some setnz stuff. Not sure if loops are
8925 * worth it. */
8926 /* Check CX. */
8927#ifdef RT_ARCH_AMD64
8928 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
8929#else
8930 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
8931 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
8932 iemNativeRegFreeTmp(pReNative, idxTmpReg);
8933#endif
8934
8935 /* Check the EFlags bit. */
8936 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
8937 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
8938 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
8939 !fCheckIfSet /*fJmpIfSet*/);
8940
8941 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
8942 iemNativeRegFreeTmp(pReNative, idxEflReg);
8943
8944 iemNativeCondStartIfBlock(pReNative, off);
8945 return off;
8946}
8947
8948
8949#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
8950 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
8951 do {
8952
8953#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
8954 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
8955 do {
8956
8957#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
8958 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
8959 do {
8960
8961#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
8962 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
8963 do {
8964
8965/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
8966 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
8967 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
8968 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
8969DECL_INLINE_THROW(uint32_t)
8970iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8971 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
8972{
8973 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8974
8975 /* We have to load both RCX and EFLAGS before we can start branching,
8976 otherwise we'll end up in the else-block with an inconsistent
8977 register allocator state.
8978 Doing EFLAGS first as it's more likely to be loaded, right? */
8979 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8980 kIemNativeGstRegUse_ReadOnly);
8981 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
8982 kIemNativeGstRegUse_ReadOnly);
8983
8984 /** @todo we could reduce this to a single branch instruction by spending a
8985 * temporary register and some setnz stuff. Not sure if loops are
8986 * worth it. */
8987 /* Check RCX/ECX. */
8988 if (f64Bit)
8989 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
8990 else
8991 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
8992
8993 /* Check the EFlags bit. */
8994 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
8995 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
8996 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
8997 !fCheckIfSet /*fJmpIfSet*/);
8998
8999 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
9000 iemNativeRegFreeTmp(pReNative, idxEflReg);
9001
9002 iemNativeCondStartIfBlock(pReNative, off);
9003 return off;
9004}
9005
9006
9007
9008/*********************************************************************************************************************************
9009* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
9010*********************************************************************************************************************************/
9011/** Number of hidden arguments for CIMPL calls.
9012 * @note We're sufferning from the usual VBOXSTRICTRC fun on Windows. */
9013#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
9014# define IEM_CIMPL_HIDDEN_ARGS 3
9015#else
9016# define IEM_CIMPL_HIDDEN_ARGS 2
9017#endif
9018
9019#define IEM_MC_NOREF(a_Name) \
9020 RT_NOREF_PV(a_Name)
9021
9022#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
9023 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
9024
9025#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
9026 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
9027
9028#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
9029 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
9030
9031#define IEM_MC_LOCAL(a_Type, a_Name) \
9032 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
9033
9034#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
9035 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
9036
9037
9038/**
9039 * Gets the number of hidden arguments for an expected IEM_MC_CALL statement.
9040 */
9041DECLINLINE(uint8_t) iemNativeArgGetHiddenArgCount(PIEMRECOMPILERSTATE pReNative)
9042{
9043 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_CIMPL)
9044 return IEM_CIMPL_HIDDEN_ARGS;
9045 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE)
9046 return 1;
9047 return 0;
9048}
9049
9050
9051/**
9052 * Internal work that allocates a variable with kind set to
9053 * kIemNativeVarKind_Invalid and no current stack allocation.
9054 *
9055 * The kind will either be set by the caller or later when the variable is first
9056 * assigned a value.
9057 *
9058 * @returns Unpacked index.
9059 * @internal
9060 */
9061static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
9062{
9063 Assert(cbType > 0 && cbType <= 64);
9064 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
9065 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
9066 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
9067 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
9068 pReNative->Core.aVars[idxVar].cbVar = cbType;
9069 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
9070 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
9071 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
9072 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
9073 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
9074 pReNative->Core.aVars[idxVar].fRegAcquired = false;
9075 pReNative->Core.aVars[idxVar].u.uValue = 0;
9076 return idxVar;
9077}
9078
9079
9080/**
9081 * Internal work that allocates an argument variable w/o setting enmKind.
9082 *
9083 * @returns Unpacked index.
9084 * @internal
9085 */
9086static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
9087{
9088 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
9089 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
9090 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
9091
9092 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
9093 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
9094 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
9095 return idxVar;
9096}
9097
9098
9099/**
9100 * Gets the stack slot for a stack variable, allocating one if necessary.
9101 *
9102 * Calling this function implies that the stack slot will contain a valid
9103 * variable value. The caller deals with any register currently assigned to the
9104 * variable, typically by spilling it into the stack slot.
9105 *
9106 * @returns The stack slot number.
9107 * @param pReNative The recompiler state.
9108 * @param idxVar The variable.
9109 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
9110 */
9111DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
9112{
9113 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9114 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9115 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
9116
9117 /* Already got a slot? */
9118 uint8_t const idxStackSlot = pVar->idxStackSlot;
9119 if (idxStackSlot != UINT8_MAX)
9120 {
9121 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
9122 return idxStackSlot;
9123 }
9124
9125 /*
9126 * A single slot is easy to allocate.
9127 * Allocate them from the top end, closest to BP, to reduce the displacement.
9128 */
9129 if (pVar->cbVar <= sizeof(uint64_t))
9130 {
9131 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
9132 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
9133 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
9134 pVar->idxStackSlot = (uint8_t)iSlot;
9135 Log11(("iemNativeVarSetKindToStack: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
9136 return (uint8_t)iSlot;
9137 }
9138
9139 /*
9140 * We need more than one stack slot.
9141 *
9142 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
9143 */
9144 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
9145 Assert(pVar->cbVar <= 64);
9146 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
9147 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
9148 uint32_t bmStack = ~pReNative->Core.bmStack;
9149 while (bmStack != UINT32_MAX)
9150 {
9151/** @todo allocate from the top to reduce BP displacement. */
9152 unsigned const iSlot = ASMBitFirstSetU32(bmStack) - 1;
9153 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
9154 if (!(iSlot & fBitAlignMask))
9155 {
9156 if ((bmStack & (fBitAllocMask << iSlot)) == (fBitAllocMask << iSlot))
9157 {
9158 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
9159 pVar->idxStackSlot = (uint8_t)iSlot;
9160 Log11(("iemNativeVarSetKindToStack: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
9161 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
9162 return (uint8_t)iSlot;
9163 }
9164 }
9165 bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);
9166 }
9167 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
9168}
9169
9170
9171/**
9172 * Changes the variable to a stack variable.
9173 *
9174 * Currently this is s only possible to do the first time the variable is used,
9175 * switching later is can be implemented but not done.
9176 *
9177 * @param pReNative The recompiler state.
9178 * @param idxVar The variable.
9179 * @throws VERR_IEM_VAR_IPE_2
9180 */
9181static void iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
9182{
9183 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9184 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9185 if (pVar->enmKind != kIemNativeVarKind_Stack)
9186 {
9187 /* We could in theory transition from immediate to stack as well, but it
9188 would involve the caller doing work storing the value on the stack. So,
9189 till that's required we only allow transition from invalid. */
9190 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9191 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9192 pVar->enmKind = kIemNativeVarKind_Stack;
9193
9194 /* Note! We don't allocate a stack slot here, that's only done when a
9195 slot is actually needed to hold a variable value. */
9196 }
9197}
9198
9199
9200/**
9201 * Sets it to a variable with a constant value.
9202 *
9203 * This does not require stack storage as we know the value and can always
9204 * reload it, unless of course it's referenced.
9205 *
9206 * @param pReNative The recompiler state.
9207 * @param idxVar The variable.
9208 * @param uValue The immediate value.
9209 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
9210 */
9211static void iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
9212{
9213 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9214 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9215 if (pVar->enmKind != kIemNativeVarKind_Immediate)
9216 {
9217 /* Only simple transitions for now. */
9218 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9219 pVar->enmKind = kIemNativeVarKind_Immediate;
9220 }
9221 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9222
9223 pVar->u.uValue = uValue;
9224 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
9225 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
9226 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
9227}
9228
9229
9230/**
9231 * Sets the variable to a reference (pointer) to @a idxOtherVar.
9232 *
9233 * This does not require stack storage as we know the value and can always
9234 * reload it. Loading is postponed till needed.
9235 *
9236 * @param pReNative The recompiler state.
9237 * @param idxVar The variable. Unpacked.
9238 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
9239 *
9240 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
9241 * @internal
9242 */
9243static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
9244{
9245 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
9246 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
9247
9248 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
9249 {
9250 /* Only simple transitions for now. */
9251 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
9252 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9253 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
9254 }
9255 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9256
9257 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
9258
9259 /* Update the other variable, ensure it's a stack variable. */
9260 /** @todo handle variables with const values... that'll go boom now. */
9261 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
9262 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
9263}
9264
9265
9266/**
9267 * Sets the variable to a reference (pointer) to a guest register reference.
9268 *
9269 * This does not require stack storage as we know the value and can always
9270 * reload it. Loading is postponed till needed.
9271 *
9272 * @param pReNative The recompiler state.
9273 * @param idxVar The variable.
9274 * @param enmRegClass The class guest registers to reference.
9275 * @param idxReg The register within @a enmRegClass to reference.
9276 *
9277 * @throws VERR_IEM_VAR_IPE_2
9278 */
9279static void iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
9280 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
9281{
9282 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9283 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9284
9285 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
9286 {
9287 /* Only simple transitions for now. */
9288 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9289 pVar->enmKind = kIemNativeVarKind_GstRegRef;
9290 }
9291 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9292
9293 pVar->u.GstRegRef.enmClass = enmRegClass;
9294 pVar->u.GstRegRef.idx = idxReg;
9295}
9296
9297
9298DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
9299{
9300 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
9301}
9302
9303
9304DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
9305{
9306 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
9307
9308 /* Since we're using a generic uint64_t value type, we must truncate it if
9309 the variable is smaller otherwise we may end up with too large value when
9310 scaling up a imm8 w/ sign-extension.
9311
9312 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
9313 in the bios, bx=1) when running on arm, because clang expect 16-bit
9314 register parameters to have bits 16 and up set to zero. Instead of
9315 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
9316 CF value in the result. */
9317 switch (cbType)
9318 {
9319 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
9320 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
9321 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
9322 }
9323 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
9324 return idxVar;
9325}
9326
9327
9328DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
9329{
9330 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
9331 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
9332 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
9333 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
9334 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
9335 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
9336
9337 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
9338 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
9339 return idxArgVar;
9340}
9341
9342
9343DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
9344{
9345 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
9346 /* Don't set to stack now, leave that to the first use as for instance
9347 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
9348 return idxVar;
9349}
9350
9351
9352DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
9353{
9354 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
9355
9356 /* Since we're using a generic uint64_t value type, we must truncate it if
9357 the variable is smaller otherwise we may end up with too large value when
9358 scaling up a imm8 w/ sign-extension. */
9359 switch (cbType)
9360 {
9361 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
9362 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
9363 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
9364 }
9365 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
9366 return idxVar;
9367}
9368
9369
9370/**
9371 * Makes sure variable @a idxVar has a register assigned to it and that it stays
9372 * fixed till we call iemNativeVarRegisterRelease.
9373 *
9374 * @returns The host register number.
9375 * @param pReNative The recompiler state.
9376 * @param idxVar The variable.
9377 * @param poff Pointer to the instruction buffer offset.
9378 * In case a register needs to be freed up or the value
9379 * loaded off the stack.
9380 * @param fInitialized Set if the variable must already have been initialized.
9381 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
9382 * the case.
9383 * @param idxRegPref Preferred register number or UINT8_MAX.
9384 */
9385DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
9386 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
9387{
9388 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9389 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9390 Assert(pVar->cbVar <= 8);
9391 Assert(!pVar->fRegAcquired);
9392
9393 uint8_t idxReg = pVar->idxReg;
9394 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9395 {
9396 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
9397 && pVar->enmKind < kIemNativeVarKind_End);
9398 pVar->fRegAcquired = true;
9399 return idxReg;
9400 }
9401
9402 /*
9403 * If the kind of variable has not yet been set, default to 'stack'.
9404 */
9405 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
9406 && pVar->enmKind < kIemNativeVarKind_End);
9407 if (pVar->enmKind == kIemNativeVarKind_Invalid)
9408 iemNativeVarSetKindToStack(pReNative, idxVar);
9409
9410 /*
9411 * We have to allocate a register for the variable, even if its a stack one
9412 * as we don't know if there are modification being made to it before its
9413 * finalized (todo: analyze and insert hints about that?).
9414 *
9415 * If we can, we try get the correct register for argument variables. This
9416 * is assuming that most argument variables are fetched as close as possible
9417 * to the actual call, so that there aren't any interfering hidden calls
9418 * (memory accesses, etc) inbetween.
9419 *
9420 * If we cannot or it's a variable, we make sure no argument registers
9421 * that will be used by this MC block will be allocated here, and we always
9422 * prefer non-volatile registers to avoid needing to spill stuff for internal
9423 * call.
9424 */
9425 /** @todo Detect too early argument value fetches and warn about hidden
9426 * calls causing less optimal code to be generated in the python script. */
9427
9428 uint8_t const uArgNo = pVar->uArgNo;
9429 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
9430 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
9431 {
9432 idxReg = g_aidxIemNativeCallRegs[uArgNo];
9433 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
9434 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
9435 }
9436 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
9437 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
9438 {
9439 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
9440 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
9441 & ~pReNative->Core.bmHstRegsWithGstShadow
9442 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
9443 & fNotArgsMask;
9444 if (fRegs)
9445 {
9446 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
9447 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
9448 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
9449 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
9450 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
9451 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
9452 }
9453 else
9454 {
9455 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
9456 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
9457 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
9458 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
9459 }
9460 }
9461 else
9462 {
9463 idxReg = idxRegPref;
9464 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
9465 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
9466 }
9467 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
9468 pVar->idxReg = idxReg;
9469
9470 /*
9471 * Load it off the stack if we've got a stack slot.
9472 */
9473 uint8_t const idxStackSlot = pVar->idxStackSlot;
9474 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
9475 {
9476 Assert(fInitialized);
9477 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
9478 switch (pVar->cbVar)
9479 {
9480 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
9481 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
9482 case 3: AssertFailed(); RT_FALL_THRU();
9483 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
9484 default: AssertFailed(); RT_FALL_THRU();
9485 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
9486 }
9487 }
9488 else
9489 {
9490 Assert(idxStackSlot == UINT8_MAX);
9491 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
9492 }
9493 pVar->fRegAcquired = true;
9494 return idxReg;
9495}
9496
9497
9498/**
9499 * The value of variable @a idxVar will be written in full to the @a enmGstReg
9500 * guest register.
9501 *
9502 * This function makes sure there is a register for it and sets it to be the
9503 * current shadow copy of @a enmGstReg.
9504 *
9505 * @returns The host register number.
9506 * @param pReNative The recompiler state.
9507 * @param idxVar The variable.
9508 * @param enmGstReg The guest register this variable will be written to
9509 * after this call.
9510 * @param poff Pointer to the instruction buffer offset.
9511 * In case a register needs to be freed up or if the
9512 * variable content needs to be loaded off the stack.
9513 *
9514 * @note We DO NOT expect @a idxVar to be an argument variable,
9515 * because we can only in the commit stage of an instruction when this
9516 * function is used.
9517 */
9518DECL_HIDDEN_THROW(uint8_t)
9519iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
9520{
9521 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9522 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9523 Assert(!pVar->fRegAcquired);
9524 AssertMsgStmt( pVar->cbVar <= 8
9525 && ( pVar->enmKind == kIemNativeVarKind_Immediate
9526 || pVar->enmKind == kIemNativeVarKind_Stack),
9527 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
9528 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
9529 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
9530
9531 /*
9532 * This shouldn't ever be used for arguments, unless it's in a weird else
9533 * branch that doesn't do any calling and even then it's questionable.
9534 *
9535 * However, in case someone writes crazy wrong MC code and does register
9536 * updates before making calls, just use the regular register allocator to
9537 * ensure we get a register suitable for the intended argument number.
9538 */
9539 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
9540
9541 /*
9542 * If there is already a register for the variable, we transfer/set the
9543 * guest shadow copy assignment to it.
9544 */
9545 uint8_t idxReg = pVar->idxReg;
9546 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9547 {
9548 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
9549 {
9550 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
9551 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
9552 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
9553 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
9554 }
9555 else
9556 {
9557 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
9558 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
9559 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
9560 }
9561 /** @todo figure this one out. We need some way of making sure the register isn't
9562 * modified after this point, just in case we start writing crappy MC code. */
9563 pVar->enmGstReg = enmGstReg;
9564 pVar->fRegAcquired = true;
9565 return idxReg;
9566 }
9567 Assert(pVar->uArgNo == UINT8_MAX);
9568
9569 /*
9570 * Because this is supposed to be the commit stage, we're just tag along with the
9571 * temporary register allocator and upgrade it to a variable register.
9572 */
9573 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
9574 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
9575 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
9576 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
9577 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
9578 pVar->idxReg = idxReg;
9579
9580 /*
9581 * Now we need to load the register value.
9582 */
9583 if (pVar->enmKind == kIemNativeVarKind_Immediate)
9584 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
9585 else
9586 {
9587 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
9588 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
9589 switch (pVar->cbVar)
9590 {
9591 case sizeof(uint64_t):
9592 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
9593 break;
9594 case sizeof(uint32_t):
9595 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
9596 break;
9597 case sizeof(uint16_t):
9598 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
9599 break;
9600 case sizeof(uint8_t):
9601 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
9602 break;
9603 default:
9604 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
9605 }
9606 }
9607
9608 pVar->fRegAcquired = true;
9609 return idxReg;
9610}
9611
9612
9613/**
9614 * Sets the host register for @a idxVarRc to @a idxReg.
9615 *
9616 * The register must not be allocated. Any guest register shadowing will be
9617 * implictly dropped by this call.
9618 *
9619 * The variable must not have any register associated with it (causes
9620 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
9621 * implied.
9622 *
9623 * @returns idxReg
9624 * @param pReNative The recompiler state.
9625 * @param idxVar The variable.
9626 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
9627 * @param off For recording in debug info.
9628 *
9629 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
9630 */
9631DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
9632{
9633 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9634 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9635 Assert(!pVar->fRegAcquired);
9636 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
9637 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
9638 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
9639
9640 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
9641 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
9642
9643 iemNativeVarSetKindToStack(pReNative, idxVar);
9644 pVar->idxReg = idxReg;
9645
9646 return idxReg;
9647}
9648
9649
9650/**
9651 * A convenient helper function.
9652 */
9653DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
9654 uint8_t idxReg, uint32_t *poff)
9655{
9656 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
9657 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
9658 return idxReg;
9659}
9660
9661
9662/**
9663 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
9664 *
9665 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
9666 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
9667 * requirement of flushing anything in volatile host registers when making a
9668 * call.
9669 *
9670 * @returns New @a off value.
9671 * @param pReNative The recompiler state.
9672 * @param off The code buffer position.
9673 * @param fHstRegsNotToSave Set of registers not to save & restore.
9674 */
9675DECL_HIDDEN_THROW(uint32_t)
9676iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
9677{
9678 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
9679 if (fHstRegs)
9680 {
9681 do
9682 {
9683 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
9684 fHstRegs &= ~RT_BIT_32(idxHstReg);
9685
9686 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
9687 {
9688 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
9689 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9690 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
9691 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
9692 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
9693 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
9694 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
9695 {
9696 case kIemNativeVarKind_Stack:
9697 {
9698 /* Temporarily spill the variable register. */
9699 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
9700 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
9701 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
9702 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
9703 continue;
9704 }
9705
9706 case kIemNativeVarKind_Immediate:
9707 case kIemNativeVarKind_VarRef:
9708 case kIemNativeVarKind_GstRegRef:
9709 /* It is weird to have any of these loaded at this point. */
9710 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
9711 continue;
9712
9713 case kIemNativeVarKind_End:
9714 case kIemNativeVarKind_Invalid:
9715 break;
9716 }
9717 AssertFailed();
9718 }
9719 else
9720 {
9721 /*
9722 * Allocate a temporary stack slot and spill the register to it.
9723 */
9724 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
9725 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
9726 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
9727 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
9728 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
9729 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
9730 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
9731 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
9732 }
9733 } while (fHstRegs);
9734 }
9735 return off;
9736}
9737
9738
9739/**
9740 * Emit code to restore volatile registers after to a call to a helper.
9741 *
9742 * @returns New @a off value.
9743 * @param pReNative The recompiler state.
9744 * @param off The code buffer position.
9745 * @param fHstRegsNotToSave Set of registers not to save & restore.
9746 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
9747 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
9748 */
9749DECL_HIDDEN_THROW(uint32_t)
9750iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
9751{
9752 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
9753 if (fHstRegs)
9754 {
9755 do
9756 {
9757 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
9758 fHstRegs &= ~RT_BIT_32(idxHstReg);
9759
9760 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
9761 {
9762 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
9763 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9764 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
9765 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
9766 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
9767 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
9768 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
9769 {
9770 case kIemNativeVarKind_Stack:
9771 {
9772 /* Unspill the variable register. */
9773 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
9774 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
9775 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
9776 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
9777 continue;
9778 }
9779
9780 case kIemNativeVarKind_Immediate:
9781 case kIemNativeVarKind_VarRef:
9782 case kIemNativeVarKind_GstRegRef:
9783 /* It is weird to have any of these loaded at this point. */
9784 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
9785 continue;
9786
9787 case kIemNativeVarKind_End:
9788 case kIemNativeVarKind_Invalid:
9789 break;
9790 }
9791 AssertFailed();
9792 }
9793 else
9794 {
9795 /*
9796 * Restore from temporary stack slot.
9797 */
9798 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
9799 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
9800 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
9801 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
9802
9803 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
9804 }
9805 } while (fHstRegs);
9806 }
9807 return off;
9808}
9809
9810
9811/**
9812 * Worker that frees the stack slots for variable @a idxVar if any allocated.
9813 *
9814 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
9815 *
9816 * ASSUMES that @a idxVar is valid and unpacked.
9817 */
9818DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
9819{
9820 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
9821 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
9822 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
9823 {
9824 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
9825 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
9826 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
9827 Assert(cSlots > 0);
9828 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
9829 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
9830 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
9831 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
9832 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
9833 }
9834 else
9835 Assert(idxStackSlot == UINT8_MAX);
9836}
9837
9838
9839/**
9840 * Worker that frees a single variable.
9841 *
9842 * ASSUMES that @a idxVar is valid and unpacked.
9843 */
9844DECLINLINE(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
9845{
9846 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
9847 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
9848 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
9849
9850 /* Free the host register first if any assigned. */
9851 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
9852 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9853 {
9854 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
9855 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
9856 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
9857 }
9858
9859 /* Free argument mapping. */
9860 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
9861 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
9862 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
9863
9864 /* Free the stack slots. */
9865 iemNativeVarFreeStackSlots(pReNative, idxVar);
9866
9867 /* Free the actual variable. */
9868 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
9869 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
9870}
9871
9872
9873/**
9874 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
9875 */
9876DECLINLINE(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
9877{
9878 while (bmVars != 0)
9879 {
9880 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
9881 bmVars &= ~RT_BIT_32(idxVar);
9882
9883#if 1 /** @todo optimize by simplifying this later... */
9884 iemNativeVarFreeOneWorker(pReNative, idxVar);
9885#else
9886 /* Only need to free the host register, the rest is done as bulk updates below. */
9887 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
9888 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9889 {
9890 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
9891 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
9892 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
9893 }
9894#endif
9895 }
9896#if 0 /** @todo optimize by simplifying this later... */
9897 pReNative->Core.bmVars = 0;
9898 pReNative->Core.bmStack = 0;
9899 pReNative->Core.u64ArgVars = UINT64_MAX;
9900#endif
9901}
9902
9903
9904/**
9905 * This is called by IEM_MC_END() to clean up all variables.
9906 */
9907DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
9908{
9909 uint32_t const bmVars = pReNative->Core.bmVars;
9910 if (bmVars != 0)
9911 iemNativeVarFreeAllSlow(pReNative, bmVars);
9912 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
9913 Assert(pReNative->Core.bmStack == 0);
9914}
9915
9916
9917#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
9918
9919/**
9920 * This is called by IEM_MC_FREE_LOCAL.
9921 */
9922DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
9923{
9924 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9925 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
9926 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
9927}
9928
9929
9930#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
9931
9932/**
9933 * This is called by IEM_MC_FREE_ARG.
9934 */
9935DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
9936{
9937 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9938 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
9939 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
9940}
9941
9942
9943#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
9944
9945/**
9946 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
9947 */
9948DECL_INLINE_THROW(uint32_t)
9949iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
9950{
9951 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
9952 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
9953 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9954 Assert( pVarDst->cbVar == sizeof(uint16_t)
9955 || pVarDst->cbVar == sizeof(uint32_t));
9956
9957 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
9958 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
9959 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
9960 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
9961 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9962
9963 Assert(pVarDst->cbVar < pVarSrc->cbVar);
9964
9965 /*
9966 * Special case for immediates.
9967 */
9968 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
9969 {
9970 switch (pVarDst->cbVar)
9971 {
9972 case sizeof(uint16_t):
9973 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
9974 break;
9975 case sizeof(uint32_t):
9976 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
9977 break;
9978 default: AssertFailed(); break;
9979 }
9980 }
9981 else
9982 {
9983 /*
9984 * The generic solution for now.
9985 */
9986 /** @todo optimize this by having the python script make sure the source
9987 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
9988 * statement. Then we could just transfer the register assignments. */
9989 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
9990 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
9991 switch (pVarDst->cbVar)
9992 {
9993 case sizeof(uint16_t):
9994 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
9995 break;
9996 case sizeof(uint32_t):
9997 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
9998 break;
9999 default: AssertFailed(); break;
10000 }
10001 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
10002 iemNativeVarRegisterRelease(pReNative, idxVarDst);
10003 }
10004 return off;
10005}
10006
10007
10008
10009/*********************************************************************************************************************************
10010* Emitters for IEM_MC_CALL_CIMPL_XXX *
10011*********************************************************************************************************************************/
10012
10013/**
10014 * Emits code to load a reference to the given guest register into @a idxGprDst.
10015 */
10016DECL_INLINE_THROW(uint32_t)
10017iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
10018 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
10019{
10020#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
10021 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
10022#endif
10023
10024 /*
10025 * Get the offset relative to the CPUMCTX structure.
10026 */
10027 uint32_t offCpumCtx;
10028 switch (enmClass)
10029 {
10030 case kIemNativeGstRegRef_Gpr:
10031 Assert(idxRegInClass < 16);
10032 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
10033 break;
10034
10035 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
10036 Assert(idxRegInClass < 4);
10037 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
10038 break;
10039
10040 case kIemNativeGstRegRef_EFlags:
10041 Assert(idxRegInClass == 0);
10042 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
10043 break;
10044
10045 case kIemNativeGstRegRef_MxCsr:
10046 Assert(idxRegInClass == 0);
10047 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
10048 break;
10049
10050 case kIemNativeGstRegRef_FpuReg:
10051 Assert(idxRegInClass < 8);
10052 AssertFailed(); /** @todo what kind of indexing? */
10053 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
10054 break;
10055
10056 case kIemNativeGstRegRef_MReg:
10057 Assert(idxRegInClass < 8);
10058 AssertFailed(); /** @todo what kind of indexing? */
10059 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
10060 break;
10061
10062 case kIemNativeGstRegRef_XReg:
10063 Assert(idxRegInClass < 16);
10064 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
10065 break;
10066
10067 default:
10068 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
10069 }
10070
10071 /*
10072 * Load the value into the destination register.
10073 */
10074#ifdef RT_ARCH_AMD64
10075 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
10076
10077#elif defined(RT_ARCH_ARM64)
10078 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
10079 Assert(offCpumCtx < 4096);
10080 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
10081
10082#else
10083# error "Port me!"
10084#endif
10085
10086 return off;
10087}
10088
10089
10090/**
10091 * Common code for CIMPL and AIMPL calls.
10092 *
10093 * These are calls that uses argument variables and such. They should not be
10094 * confused with internal calls required to implement an MC operation,
10095 * like a TLB load and similar.
10096 *
10097 * Upon return all that is left to do is to load any hidden arguments and
10098 * perform the call. All argument variables are freed.
10099 *
10100 * @returns New code buffer offset; throws VBox status code on error.
10101 * @param pReNative The native recompile state.
10102 * @param off The code buffer offset.
10103 * @param cArgs The total nubmer of arguments (includes hidden
10104 * count).
10105 * @param cHiddenArgs The number of hidden arguments. The hidden
10106 * arguments must not have any variable declared for
10107 * them, whereas all the regular arguments must
10108 * (tstIEMCheckMc ensures this).
10109 */
10110DECL_HIDDEN_THROW(uint32_t)
10111iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
10112{
10113#ifdef VBOX_STRICT
10114 /*
10115 * Assert sanity.
10116 */
10117 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
10118 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
10119 for (unsigned i = 0; i < cHiddenArgs; i++)
10120 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
10121 for (unsigned i = cHiddenArgs; i < cArgs; i++)
10122 {
10123 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
10124 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
10125 }
10126 iemNativeRegAssertSanity(pReNative);
10127#endif
10128
10129 /* We don't know what the called function makes use of, so flush any pending register writes. */
10130 off = iemNativeRegFlushPendingWrites(pReNative, off);
10131
10132 /*
10133 * Before we do anything else, go over variables that are referenced and
10134 * make sure they are not in a register.
10135 */
10136 uint32_t bmVars = pReNative->Core.bmVars;
10137 if (bmVars)
10138 {
10139 do
10140 {
10141 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
10142 bmVars &= ~RT_BIT_32(idxVar);
10143
10144 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
10145 {
10146 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
10147 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
10148 {
10149 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
10150 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
10151 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
10152 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
10153 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
10154
10155 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
10156 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
10157 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
10158 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
10159 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
10160 }
10161 }
10162 } while (bmVars != 0);
10163#if 0 //def VBOX_STRICT
10164 iemNativeRegAssertSanity(pReNative);
10165#endif
10166 }
10167
10168 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
10169
10170 /*
10171 * First, go over the host registers that will be used for arguments and make
10172 * sure they either hold the desired argument or are free.
10173 */
10174 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
10175 {
10176 for (uint32_t i = 0; i < cRegArgs; i++)
10177 {
10178 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
10179 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
10180 {
10181 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
10182 {
10183 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
10184 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
10185 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
10186 Assert(pVar->idxReg == idxArgReg);
10187 uint8_t const uArgNo = pVar->uArgNo;
10188 if (uArgNo == i)
10189 { /* prefect */ }
10190 /* The variable allocator logic should make sure this is impossible,
10191 except for when the return register is used as a parameter (ARM,
10192 but not x86). */
10193#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
10194 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
10195 {
10196# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
10197# error "Implement this"
10198# endif
10199 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
10200 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
10201 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
10202 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
10203 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
10204 }
10205#endif
10206 else
10207 {
10208 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
10209
10210 if (pVar->enmKind == kIemNativeVarKind_Stack)
10211 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
10212 else
10213 {
10214 /* just free it, can be reloaded if used again */
10215 pVar->idxReg = UINT8_MAX;
10216 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
10217 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
10218 }
10219 }
10220 }
10221 else
10222 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
10223 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
10224 }
10225 }
10226#if 0 //def VBOX_STRICT
10227 iemNativeRegAssertSanity(pReNative);
10228#endif
10229 }
10230
10231 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
10232
10233#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
10234 /*
10235 * If there are any stack arguments, make sure they are in their place as well.
10236 *
10237 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
10238 * the caller) be loading it later and it must be free (see first loop).
10239 */
10240 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
10241 {
10242 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
10243 {
10244 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
10245 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
10246 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
10247 {
10248 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
10249 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
10250 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
10251 pVar->idxReg = UINT8_MAX;
10252 }
10253 else
10254 {
10255 /* Use ARG0 as temp for stuff we need registers for. */
10256 switch (pVar->enmKind)
10257 {
10258 case kIemNativeVarKind_Stack:
10259 {
10260 uint8_t const idxStackSlot = pVar->idxStackSlot;
10261 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
10262 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
10263 iemNativeStackCalcBpDisp(idxStackSlot));
10264 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
10265 continue;
10266 }
10267
10268 case kIemNativeVarKind_Immediate:
10269 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
10270 continue;
10271
10272 case kIemNativeVarKind_VarRef:
10273 {
10274 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
10275 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
10276 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
10277 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
10278 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
10279 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
10280 {
10281 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
10282 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
10283 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
10284 }
10285 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
10286 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
10287 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
10288 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
10289 continue;
10290 }
10291
10292 case kIemNativeVarKind_GstRegRef:
10293 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
10294 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
10295 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
10296 continue;
10297
10298 case kIemNativeVarKind_Invalid:
10299 case kIemNativeVarKind_End:
10300 break;
10301 }
10302 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
10303 }
10304 }
10305# if 0 //def VBOX_STRICT
10306 iemNativeRegAssertSanity(pReNative);
10307# endif
10308 }
10309#else
10310 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
10311#endif
10312
10313 /*
10314 * Make sure the argument variables are loaded into their respective registers.
10315 *
10316 * We can optimize this by ASSUMING that any register allocations are for
10317 * registeres that have already been loaded and are ready. The previous step
10318 * saw to that.
10319 */
10320 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
10321 {
10322 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
10323 {
10324 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
10325 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
10326 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
10327 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
10328 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
10329 else
10330 {
10331 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
10332 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
10333 {
10334 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
10335 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
10336 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
10337 | RT_BIT_32(idxArgReg);
10338 pVar->idxReg = idxArgReg;
10339 }
10340 else
10341 {
10342 /* Use ARG0 as temp for stuff we need registers for. */
10343 switch (pVar->enmKind)
10344 {
10345 case kIemNativeVarKind_Stack:
10346 {
10347 uint8_t const idxStackSlot = pVar->idxStackSlot;
10348 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
10349 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
10350 continue;
10351 }
10352
10353 case kIemNativeVarKind_Immediate:
10354 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
10355 continue;
10356
10357 case kIemNativeVarKind_VarRef:
10358 {
10359 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
10360 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
10361 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
10362 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
10363 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
10364 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
10365 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
10366 {
10367 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
10368 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
10369 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
10370 }
10371 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
10372 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
10373 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
10374 continue;
10375 }
10376
10377 case kIemNativeVarKind_GstRegRef:
10378 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
10379 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
10380 continue;
10381
10382 case kIemNativeVarKind_Invalid:
10383 case kIemNativeVarKind_End:
10384 break;
10385 }
10386 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
10387 }
10388 }
10389 }
10390#if 0 //def VBOX_STRICT
10391 iemNativeRegAssertSanity(pReNative);
10392#endif
10393 }
10394#ifdef VBOX_STRICT
10395 else
10396 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
10397 {
10398 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
10399 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
10400 }
10401#endif
10402
10403 /*
10404 * Free all argument variables (simplified).
10405 * Their lifetime always expires with the call they are for.
10406 */
10407 /** @todo Make the python script check that arguments aren't used after
10408 * IEM_MC_CALL_XXXX. */
10409 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
10410 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
10411 * an argument value. There is also some FPU stuff. */
10412 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
10413 {
10414 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
10415 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
10416
10417 /* no need to free registers: */
10418 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
10419 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
10420 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
10421 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
10422 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
10423 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
10424
10425 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
10426 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
10427 iemNativeVarFreeStackSlots(pReNative, idxVar);
10428 }
10429 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
10430
10431 /*
10432 * Flush volatile registers as we make the call.
10433 */
10434 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
10435
10436 return off;
10437}
10438
10439
10440/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
10441DECL_HIDDEN_THROW(uint32_t)
10442iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
10443 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
10444
10445{
10446 /*
10447 * Do all the call setup and cleanup.
10448 */
10449 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
10450
10451 /*
10452 * Load the two or three hidden arguments.
10453 */
10454#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
10455 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
10456 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
10457 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
10458#else
10459 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
10460 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
10461#endif
10462
10463 /*
10464 * Make the call and check the return code.
10465 *
10466 * Shadow PC copies are always flushed here, other stuff depends on flags.
10467 * Segment and general purpose registers are explictily flushed via the
10468 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
10469 * macros.
10470 */
10471 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
10472#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
10473 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
10474#endif
10475 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
10476 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
10477 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
10478 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
10479
10480 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
10481}
10482
10483
10484#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
10485 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
10486
10487/** Emits code for IEM_MC_CALL_CIMPL_1. */
10488DECL_INLINE_THROW(uint32_t)
10489iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
10490 uintptr_t pfnCImpl, uint8_t idxArg0)
10491{
10492 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
10493 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
10494}
10495
10496
10497#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
10498 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
10499
10500/** Emits code for IEM_MC_CALL_CIMPL_2. */
10501DECL_INLINE_THROW(uint32_t)
10502iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
10503 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
10504{
10505 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
10506 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
10507 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
10508}
10509
10510
10511#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
10512 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
10513 (uintptr_t)a_pfnCImpl, a0, a1, a2)
10514
10515/** Emits code for IEM_MC_CALL_CIMPL_3. */
10516DECL_INLINE_THROW(uint32_t)
10517iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
10518 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
10519{
10520 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
10521 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
10522 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
10523 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
10524}
10525
10526
10527#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
10528 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
10529 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
10530
10531/** Emits code for IEM_MC_CALL_CIMPL_4. */
10532DECL_INLINE_THROW(uint32_t)
10533iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
10534 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
10535{
10536 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
10537 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
10538 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
10539 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
10540 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
10541}
10542
10543
10544#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
10545 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
10546 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
10547
10548/** Emits code for IEM_MC_CALL_CIMPL_4. */
10549DECL_INLINE_THROW(uint32_t)
10550iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
10551 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
10552{
10553 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
10554 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
10555 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
10556 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
10557 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
10558 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
10559}
10560
10561
10562/** Recompiler debugging: Flush guest register shadow copies. */
10563#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
10564
10565
10566
10567/*********************************************************************************************************************************
10568* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
10569*********************************************************************************************************************************/
10570
10571/**
10572 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
10573 */
10574DECL_INLINE_THROW(uint32_t)
10575iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
10576 uintptr_t pfnAImpl, uint8_t cArgs)
10577{
10578 if (idxVarRc != UINT8_MAX)
10579 {
10580 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
10581 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
10582 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
10583 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
10584 }
10585
10586 /*
10587 * Do all the call setup and cleanup.
10588 */
10589 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/);
10590
10591 /*
10592 * Make the call and update the return code variable if we've got one.
10593 */
10594 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
10595 if (idxVarRc != UINT8_MAX)
10596 {
10597off = iemNativeEmitBrk(pReNative, off, 0x4222); /** @todo test IEM_MC_CALL_AIMPL_3 and IEM_MC_CALL_AIMPL_4 return codes. */
10598 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
10599 }
10600
10601 return off;
10602}
10603
10604
10605
10606#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
10607 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
10608
10609#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
10610 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
10611
10612/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
10613DECL_INLINE_THROW(uint32_t)
10614iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
10615{
10616 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
10617}
10618
10619
10620#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
10621 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
10622
10623#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
10624 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
10625
10626/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
10627DECL_INLINE_THROW(uint32_t)
10628iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
10629{
10630 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
10631 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
10632}
10633
10634
10635#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
10636 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
10637
10638#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
10639 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
10640
10641/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
10642DECL_INLINE_THROW(uint32_t)
10643iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
10644 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
10645{
10646 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
10647 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
10648 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
10649}
10650
10651
10652#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
10653 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
10654
10655#define IEM_MC_CALL_AIMPL_3(a_rc, a_pfn, a0, a1, a2) \
10656 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
10657
10658/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
10659DECL_INLINE_THROW(uint32_t)
10660iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
10661 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
10662{
10663 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
10664 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
10665 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
10666 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
10667}
10668
10669
10670#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
10671 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
10672
10673#define IEM_MC_CALL_AIMPL_4(a_rc, a_pfn, a0, a1, a2, a3) \
10674 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
10675
10676/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
10677DECL_INLINE_THROW(uint32_t)
10678iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
10679 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
10680{
10681 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
10682 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
10683 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
10684 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
10685 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
10686}
10687
10688
10689
10690/*********************************************************************************************************************************
10691* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
10692*********************************************************************************************************************************/
10693
10694#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
10695 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
10696
10697#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
10698 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
10699
10700#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
10701 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
10702
10703#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
10704 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
10705
10706
10707/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
10708 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
10709DECL_INLINE_THROW(uint32_t)
10710iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
10711{
10712 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10713 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
10714 Assert(iGRegEx < 20);
10715
10716 /* Same discussion as in iemNativeEmitFetchGregU16 */
10717 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
10718 kIemNativeGstRegUse_ReadOnly);
10719
10720 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10721 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10722
10723 /* The value is zero-extended to the full 64-bit host register width. */
10724 if (iGRegEx < 16)
10725 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
10726 else
10727 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
10728
10729 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10730 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
10731 return off;
10732}
10733
10734
10735#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
10736 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
10737
10738#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
10739 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
10740
10741#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
10742 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
10743
10744/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
10745DECL_INLINE_THROW(uint32_t)
10746iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
10747{
10748 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10749 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
10750 Assert(iGRegEx < 20);
10751
10752 /* Same discussion as in iemNativeEmitFetchGregU16 */
10753 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
10754 kIemNativeGstRegUse_ReadOnly);
10755
10756 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10757 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10758
10759 if (iGRegEx < 16)
10760 {
10761 switch (cbSignExtended)
10762 {
10763 case sizeof(uint16_t):
10764 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
10765 break;
10766 case sizeof(uint32_t):
10767 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
10768 break;
10769 case sizeof(uint64_t):
10770 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
10771 break;
10772 default: AssertFailed(); break;
10773 }
10774 }
10775 else
10776 {
10777 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
10778 switch (cbSignExtended)
10779 {
10780 case sizeof(uint16_t):
10781 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
10782 break;
10783 case sizeof(uint32_t):
10784 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
10785 break;
10786 case sizeof(uint64_t):
10787 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
10788 break;
10789 default: AssertFailed(); break;
10790 }
10791 }
10792
10793 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10794 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
10795 return off;
10796}
10797
10798
10799
10800#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
10801 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
10802
10803#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
10804 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
10805
10806#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
10807 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
10808
10809/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
10810DECL_INLINE_THROW(uint32_t)
10811iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
10812{
10813 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10814 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
10815 Assert(iGReg < 16);
10816
10817 /*
10818 * We can either just load the low 16-bit of the GPR into a host register
10819 * for the variable, or we can do so via a shadow copy host register. The
10820 * latter will avoid having to reload it if it's being stored later, but
10821 * will waste a host register if it isn't touched again. Since we don't
10822 * know what going to happen, we choose the latter for now.
10823 */
10824 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10825 kIemNativeGstRegUse_ReadOnly);
10826
10827 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10828 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10829 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
10830 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10831
10832 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
10833 return off;
10834}
10835
10836
10837#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
10838 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
10839
10840#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
10841 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
10842
10843/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
10844DECL_INLINE_THROW(uint32_t)
10845iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
10846{
10847 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10848 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
10849 Assert(iGReg < 16);
10850
10851 /*
10852 * We can either just load the low 16-bit of the GPR into a host register
10853 * for the variable, or we can do so via a shadow copy host register. The
10854 * latter will avoid having to reload it if it's being stored later, but
10855 * will waste a host register if it isn't touched again. Since we don't
10856 * know what going to happen, we choose the latter for now.
10857 */
10858 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10859 kIemNativeGstRegUse_ReadOnly);
10860
10861 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10862 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10863 if (cbSignExtended == sizeof(uint32_t))
10864 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
10865 else
10866 {
10867 Assert(cbSignExtended == sizeof(uint64_t));
10868 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
10869 }
10870 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10871
10872 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
10873 return off;
10874}
10875
10876
10877#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
10878 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
10879
10880#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
10881 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
10882
10883/** Emits code for IEM_MC_FETCH_GREG_U32. */
10884DECL_INLINE_THROW(uint32_t)
10885iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
10886{
10887 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10888 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
10889 Assert(iGReg < 16);
10890
10891 /*
10892 * We can either just load the low 16-bit of the GPR into a host register
10893 * for the variable, or we can do so via a shadow copy host register. The
10894 * latter will avoid having to reload it if it's being stored later, but
10895 * will waste a host register if it isn't touched again. Since we don't
10896 * know what going to happen, we choose the latter for now.
10897 */
10898 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10899 kIemNativeGstRegUse_ReadOnly);
10900
10901 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10902 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10903 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
10904 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10905
10906 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
10907 return off;
10908}
10909
10910
10911#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
10912 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
10913
10914/** Emits code for IEM_MC_FETCH_GREG_U32. */
10915DECL_INLINE_THROW(uint32_t)
10916iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
10917{
10918 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10919 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
10920 Assert(iGReg < 16);
10921
10922 /*
10923 * We can either just load the low 32-bit of the GPR into a host register
10924 * for the variable, or we can do so via a shadow copy host register. The
10925 * latter will avoid having to reload it if it's being stored later, but
10926 * will waste a host register if it isn't touched again. Since we don't
10927 * know what going to happen, we choose the latter for now.
10928 */
10929 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10930 kIemNativeGstRegUse_ReadOnly);
10931
10932 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10933 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10934 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
10935 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10936
10937 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
10938 return off;
10939}
10940
10941
10942#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
10943 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
10944
10945#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
10946 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
10947
10948/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
10949 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
10950DECL_INLINE_THROW(uint32_t)
10951iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
10952{
10953 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10954 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
10955 Assert(iGReg < 16);
10956
10957 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10958 kIemNativeGstRegUse_ReadOnly);
10959
10960 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10961 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10962 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
10963 /** @todo name the register a shadow one already? */
10964 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10965
10966 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
10967 return off;
10968}
10969
10970
10971
10972/*********************************************************************************************************************************
10973* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
10974*********************************************************************************************************************************/
10975
10976#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
10977 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
10978
10979/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
10980DECL_INLINE_THROW(uint32_t)
10981iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
10982{
10983 Assert(iGRegEx < 20);
10984 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
10985 kIemNativeGstRegUse_ForUpdate);
10986#ifdef RT_ARCH_AMD64
10987 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
10988
10989 /* To the lowest byte of the register: mov r8, imm8 */
10990 if (iGRegEx < 16)
10991 {
10992 if (idxGstTmpReg >= 8)
10993 pbCodeBuf[off++] = X86_OP_REX_B;
10994 else if (idxGstTmpReg >= 4)
10995 pbCodeBuf[off++] = X86_OP_REX;
10996 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
10997 pbCodeBuf[off++] = u8Value;
10998 }
10999 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
11000 else if (idxGstTmpReg < 4)
11001 {
11002 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
11003 pbCodeBuf[off++] = u8Value;
11004 }
11005 else
11006 {
11007 /* ror reg64, 8 */
11008 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
11009 pbCodeBuf[off++] = 0xc1;
11010 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
11011 pbCodeBuf[off++] = 8;
11012
11013 /* mov reg8, imm8 */
11014 if (idxGstTmpReg >= 8)
11015 pbCodeBuf[off++] = X86_OP_REX_B;
11016 else if (idxGstTmpReg >= 4)
11017 pbCodeBuf[off++] = X86_OP_REX;
11018 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
11019 pbCodeBuf[off++] = u8Value;
11020
11021 /* rol reg64, 8 */
11022 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
11023 pbCodeBuf[off++] = 0xc1;
11024 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
11025 pbCodeBuf[off++] = 8;
11026 }
11027
11028#elif defined(RT_ARCH_ARM64)
11029 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
11030 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
11031 if (iGRegEx < 16)
11032 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
11033 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
11034 else
11035 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
11036 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
11037 iemNativeRegFreeTmp(pReNative, idxImmReg);
11038
11039#else
11040# error "Port me!"
11041#endif
11042
11043 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11044
11045 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
11046
11047 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11048 return off;
11049}
11050
11051
11052#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
11053 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
11054
11055/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
11056DECL_INLINE_THROW(uint32_t)
11057iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
11058{
11059 Assert(iGRegEx < 20);
11060 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
11061
11062 /*
11063 * If it's a constant value (unlikely) we treat this as a
11064 * IEM_MC_STORE_GREG_U8_CONST statement.
11065 */
11066 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
11067 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
11068 { /* likely */ }
11069 else
11070 {
11071 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
11072 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
11073 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
11074 }
11075
11076 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
11077 kIemNativeGstRegUse_ForUpdate);
11078 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
11079
11080#ifdef RT_ARCH_AMD64
11081 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
11082 if (iGRegEx < 16)
11083 {
11084 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
11085 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
11086 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
11087 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
11088 pbCodeBuf[off++] = X86_OP_REX;
11089 pbCodeBuf[off++] = 0x8a;
11090 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
11091 }
11092 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
11093 else if (idxGstTmpReg < 4 && idxVarReg < 4)
11094 {
11095 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
11096 pbCodeBuf[off++] = 0x8a;
11097 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
11098 }
11099 else
11100 {
11101 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
11102
11103 /* ror reg64, 8 */
11104 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
11105 pbCodeBuf[off++] = 0xc1;
11106 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
11107 pbCodeBuf[off++] = 8;
11108
11109 /* mov reg8, reg8(r/m) */
11110 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
11111 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
11112 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
11113 pbCodeBuf[off++] = X86_OP_REX;
11114 pbCodeBuf[off++] = 0x8a;
11115 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
11116
11117 /* rol reg64, 8 */
11118 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
11119 pbCodeBuf[off++] = 0xc1;
11120 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
11121 pbCodeBuf[off++] = 8;
11122 }
11123
11124#elif defined(RT_ARCH_ARM64)
11125 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
11126 or
11127 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
11128 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11129 if (iGRegEx < 16)
11130 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
11131 else
11132 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
11133
11134#else
11135# error "Port me!"
11136#endif
11137 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11138
11139 iemNativeVarRegisterRelease(pReNative, idxValueVar);
11140
11141 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
11142 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11143 return off;
11144}
11145
11146
11147
11148#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
11149 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
11150
11151/** Emits code for IEM_MC_STORE_GREG_U16. */
11152DECL_INLINE_THROW(uint32_t)
11153iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
11154{
11155 Assert(iGReg < 16);
11156 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11157 kIemNativeGstRegUse_ForUpdate);
11158#ifdef RT_ARCH_AMD64
11159 /* mov reg16, imm16 */
11160 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
11161 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
11162 if (idxGstTmpReg >= 8)
11163 pbCodeBuf[off++] = X86_OP_REX_B;
11164 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
11165 pbCodeBuf[off++] = RT_BYTE1(uValue);
11166 pbCodeBuf[off++] = RT_BYTE2(uValue);
11167
11168#elif defined(RT_ARCH_ARM64)
11169 /* movk xdst, #uValue, lsl #0 */
11170 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11171 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
11172
11173#else
11174# error "Port me!"
11175#endif
11176
11177 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11178
11179 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11180 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11181 return off;
11182}
11183
11184
11185#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
11186 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
11187
11188/** Emits code for IEM_MC_STORE_GREG_U16. */
11189DECL_INLINE_THROW(uint32_t)
11190iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
11191{
11192 Assert(iGReg < 16);
11193 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
11194
11195 /*
11196 * If it's a constant value (unlikely) we treat this as a
11197 * IEM_MC_STORE_GREG_U16_CONST statement.
11198 */
11199 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
11200 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
11201 { /* likely */ }
11202 else
11203 {
11204 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
11205 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
11206 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
11207 }
11208
11209 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11210 kIemNativeGstRegUse_ForUpdate);
11211
11212#ifdef RT_ARCH_AMD64
11213 /* mov reg16, reg16 or [mem16] */
11214 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
11215 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
11216 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
11217 {
11218 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
11219 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
11220 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
11221 pbCodeBuf[off++] = 0x8b;
11222 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
11223 }
11224 else
11225 {
11226 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
11227 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
11228 if (idxGstTmpReg >= 8)
11229 pbCodeBuf[off++] = X86_OP_REX_R;
11230 pbCodeBuf[off++] = 0x8b;
11231 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
11232 }
11233
11234#elif defined(RT_ARCH_ARM64)
11235 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
11236 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
11237 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11238 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
11239 iemNativeVarRegisterRelease(pReNative, idxValueVar);
11240
11241#else
11242# error "Port me!"
11243#endif
11244
11245 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11246
11247 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11248 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11249 return off;
11250}
11251
11252
11253#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
11254 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
11255
11256/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
11257DECL_INLINE_THROW(uint32_t)
11258iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
11259{
11260 Assert(iGReg < 16);
11261 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11262 kIemNativeGstRegUse_ForFullWrite);
11263 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
11264 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11265 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11266 return off;
11267}
11268
11269
11270#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
11271 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
11272
11273/** Emits code for IEM_MC_STORE_GREG_U32. */
11274DECL_INLINE_THROW(uint32_t)
11275iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
11276{
11277 Assert(iGReg < 16);
11278 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
11279
11280 /*
11281 * If it's a constant value (unlikely) we treat this as a
11282 * IEM_MC_STORE_GREG_U32_CONST statement.
11283 */
11284 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
11285 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
11286 { /* likely */ }
11287 else
11288 {
11289 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
11290 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
11291 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
11292 }
11293
11294 /*
11295 * For the rest we allocate a guest register for the variable and writes
11296 * it to the CPUMCTX structure.
11297 */
11298 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
11299 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11300#ifdef VBOX_STRICT
11301 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
11302#endif
11303 iemNativeVarRegisterRelease(pReNative, idxValueVar);
11304 return off;
11305}
11306
11307
11308#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
11309 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
11310
11311/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
11312DECL_INLINE_THROW(uint32_t)
11313iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
11314{
11315 Assert(iGReg < 16);
11316 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11317 kIemNativeGstRegUse_ForFullWrite);
11318 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
11319 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11320 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11321 return off;
11322}
11323
11324
11325#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
11326 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
11327
11328/** Emits code for IEM_MC_STORE_GREG_U64. */
11329DECL_INLINE_THROW(uint32_t)
11330iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
11331{
11332 Assert(iGReg < 16);
11333 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
11334
11335 /*
11336 * If it's a constant value (unlikely) we treat this as a
11337 * IEM_MC_STORE_GREG_U64_CONST statement.
11338 */
11339 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
11340 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
11341 { /* likely */ }
11342 else
11343 {
11344 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
11345 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
11346 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
11347 }
11348
11349 /*
11350 * For the rest we allocate a guest register for the variable and writes
11351 * it to the CPUMCTX structure.
11352 */
11353 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
11354 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11355 iemNativeVarRegisterRelease(pReNative, idxValueVar);
11356 return off;
11357}
11358
11359
11360#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
11361 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
11362
11363/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
11364DECL_INLINE_THROW(uint32_t)
11365iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
11366{
11367 Assert(iGReg < 16);
11368 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11369 kIemNativeGstRegUse_ForUpdate);
11370 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
11371 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11372 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11373 return off;
11374}
11375
11376
11377/*********************************************************************************************************************************
11378* General purpose register manipulation (add, sub). *
11379*********************************************************************************************************************************/
11380
11381#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
11382 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
11383
11384/** Emits code for IEM_MC_ADD_GREG_U16. */
11385DECL_INLINE_THROW(uint32_t)
11386iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
11387{
11388 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11389 kIemNativeGstRegUse_ForUpdate);
11390
11391#ifdef RT_ARCH_AMD64
11392 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
11393 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
11394 if (idxGstTmpReg >= 8)
11395 pbCodeBuf[off++] = X86_OP_REX_B;
11396 if (uAddend == 1)
11397 {
11398 pbCodeBuf[off++] = 0xff; /* inc */
11399 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
11400 }
11401 else
11402 {
11403 pbCodeBuf[off++] = 0x81;
11404 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
11405 pbCodeBuf[off++] = uAddend;
11406 pbCodeBuf[off++] = 0;
11407 }
11408
11409#else
11410 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
11411 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
11412
11413 /* sub tmp, gstgrp, uAddend */
11414 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
11415
11416 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
11417 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
11418
11419 iemNativeRegFreeTmp(pReNative, idxTmpReg);
11420#endif
11421
11422 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11423
11424 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11425
11426 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11427 return off;
11428}
11429
11430
11431#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
11432 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
11433
11434#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
11435 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
11436
11437/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
11438DECL_INLINE_THROW(uint32_t)
11439iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
11440{
11441 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11442 kIemNativeGstRegUse_ForUpdate);
11443
11444#ifdef RT_ARCH_AMD64
11445 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
11446 if (f64Bit)
11447 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
11448 else if (idxGstTmpReg >= 8)
11449 pbCodeBuf[off++] = X86_OP_REX_B;
11450 if (uAddend == 1)
11451 {
11452 pbCodeBuf[off++] = 0xff; /* inc */
11453 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
11454 }
11455 else if (uAddend < 128)
11456 {
11457 pbCodeBuf[off++] = 0x83; /* add */
11458 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
11459 pbCodeBuf[off++] = RT_BYTE1(uAddend);
11460 }
11461 else
11462 {
11463 pbCodeBuf[off++] = 0x81; /* add */
11464 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
11465 pbCodeBuf[off++] = RT_BYTE1(uAddend);
11466 pbCodeBuf[off++] = 0;
11467 pbCodeBuf[off++] = 0;
11468 pbCodeBuf[off++] = 0;
11469 }
11470
11471#else
11472 /* sub tmp, gstgrp, uAddend */
11473 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11474 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
11475
11476#endif
11477
11478 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11479
11480 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11481
11482 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11483 return off;
11484}
11485
11486
11487
11488#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
11489 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
11490
11491/** Emits code for IEM_MC_SUB_GREG_U16. */
11492DECL_INLINE_THROW(uint32_t)
11493iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
11494{
11495 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11496 kIemNativeGstRegUse_ForUpdate);
11497
11498#ifdef RT_ARCH_AMD64
11499 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
11500 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
11501 if (idxGstTmpReg >= 8)
11502 pbCodeBuf[off++] = X86_OP_REX_B;
11503 if (uSubtrahend == 1)
11504 {
11505 pbCodeBuf[off++] = 0xff; /* dec */
11506 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
11507 }
11508 else
11509 {
11510 pbCodeBuf[off++] = 0x81;
11511 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
11512 pbCodeBuf[off++] = uSubtrahend;
11513 pbCodeBuf[off++] = 0;
11514 }
11515
11516#else
11517 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
11518 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
11519
11520 /* sub tmp, gstgrp, uSubtrahend */
11521 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
11522
11523 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
11524 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
11525
11526 iemNativeRegFreeTmp(pReNative, idxTmpReg);
11527#endif
11528
11529 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11530
11531 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11532
11533 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11534 return off;
11535}
11536
11537
11538#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
11539 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
11540
11541#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
11542 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
11543
11544/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
11545DECL_INLINE_THROW(uint32_t)
11546iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
11547{
11548 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11549 kIemNativeGstRegUse_ForUpdate);
11550
11551#ifdef RT_ARCH_AMD64
11552 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
11553 if (f64Bit)
11554 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
11555 else if (idxGstTmpReg >= 8)
11556 pbCodeBuf[off++] = X86_OP_REX_B;
11557 if (uSubtrahend == 1)
11558 {
11559 pbCodeBuf[off++] = 0xff; /* dec */
11560 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
11561 }
11562 else if (uSubtrahend < 128)
11563 {
11564 pbCodeBuf[off++] = 0x83; /* sub */
11565 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
11566 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
11567 }
11568 else
11569 {
11570 pbCodeBuf[off++] = 0x81; /* sub */
11571 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
11572 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
11573 pbCodeBuf[off++] = 0;
11574 pbCodeBuf[off++] = 0;
11575 pbCodeBuf[off++] = 0;
11576 }
11577
11578#else
11579 /* sub tmp, gstgrp, uSubtrahend */
11580 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11581 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
11582
11583#endif
11584
11585 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11586
11587 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11588
11589 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11590 return off;
11591}
11592
11593
11594/*********************************************************************************************************************************
11595* Local variable manipulation (add, sub, and, or). *
11596*********************************************************************************************************************************/
11597
11598#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
11599 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
11600
11601#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
11602 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
11603
11604#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
11605 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
11606
11607#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
11608 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
11609
11610/** Emits code for AND'ing a local and a constant value. */
11611DECL_INLINE_THROW(uint32_t)
11612iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
11613{
11614#ifdef VBOX_STRICT
11615 switch (cbMask)
11616 {
11617 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
11618 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
11619 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
11620 case sizeof(uint64_t): break;
11621 default: AssertFailedBreak();
11622 }
11623#endif
11624
11625 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
11626 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
11627
11628 if (cbMask <= sizeof(uint32_t))
11629 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
11630 else
11631 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
11632
11633 iemNativeVarRegisterRelease(pReNative, idxVar);
11634 return off;
11635}
11636
11637
11638#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
11639 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
11640
11641#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
11642 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
11643
11644#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
11645 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
11646
11647#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
11648 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
11649
11650/** Emits code for OR'ing a local and a constant value. */
11651DECL_INLINE_THROW(uint32_t)
11652iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
11653{
11654#ifdef VBOX_STRICT
11655 switch (cbMask)
11656 {
11657 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
11658 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
11659 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
11660 case sizeof(uint64_t): break;
11661 default: AssertFailedBreak();
11662 }
11663#endif
11664
11665 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
11666 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
11667
11668 if (cbMask <= sizeof(uint32_t))
11669 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
11670 else
11671 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
11672
11673 iemNativeVarRegisterRelease(pReNative, idxVar);
11674 return off;
11675}
11676
11677
11678#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
11679 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
11680
11681#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
11682 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
11683
11684#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
11685 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
11686
11687/** Emits code for reversing the byte order in a local value. */
11688DECL_INLINE_THROW(uint32_t)
11689iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
11690{
11691 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
11692 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
11693
11694 switch (cbLocal)
11695 {
11696 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
11697 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
11698 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
11699 default: AssertFailedBreak();
11700 }
11701
11702 iemNativeVarRegisterRelease(pReNative, idxVar);
11703 return off;
11704}
11705
11706
11707
11708/*********************************************************************************************************************************
11709* EFLAGS *
11710*********************************************************************************************************************************/
11711
11712#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
11713# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
11714#else
11715# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
11716 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
11717
11718DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
11719{
11720 if (fEflOutput)
11721 {
11722 PVMCPUCC const pVCpu = pReNative->pVCpu;
11723# ifndef IEMLIVENESS_EXTENDED_LAYOUT
11724 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
11725 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
11726 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
11727# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
11728 if (fEflOutput & (a_fEfl)) \
11729 { \
11730 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
11731 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
11732 else \
11733 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
11734 } else do { } while (0)
11735# else
11736 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
11737 IEMLIVENESSBIT const LivenessClobbered =
11738 {
11739 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
11740 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
11741 | pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
11742 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
11743 };
11744 IEMLIVENESSBIT const LivenessDelayable =
11745 {
11746 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
11747 & pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
11748 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
11749 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
11750 };
11751# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
11752 if (fEflOutput & (a_fEfl)) \
11753 { \
11754 if (LivenessClobbered.a_fLivenessMember) \
11755 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
11756 else if (LivenessDelayable.a_fLivenessMember) \
11757 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
11758 else \
11759 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
11760 } else do { } while (0)
11761# endif
11762 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
11763 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
11764 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
11765 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
11766 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
11767 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
11768 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
11769# undef CHECK_FLAG_AND_UPDATE_STATS
11770 }
11771 RT_NOREF(fEflInput);
11772}
11773#endif /* VBOX_WITH_STATISTICS */
11774
11775#undef IEM_MC_FETCH_EFLAGS /* should not be used */
11776#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
11777 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags, a_fEflInput, a_fEflOutput)
11778
11779/** Handles IEM_MC_FETCH_EFLAGS_EX. */
11780DECL_INLINE_THROW(uint32_t)
11781iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags,
11782 uint32_t fEflInput, uint32_t fEflOutput)
11783{
11784 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
11785 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
11786 RT_NOREF(fEflInput, fEflOutput);
11787
11788#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
11789# ifdef VBOX_STRICT
11790 if ( pReNative->idxCurCall != 0
11791 && (fEflInput != 0 || fEflOutput != 0) /* for NOT these are both zero for now. */)
11792 {
11793 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
11794 uint32_t const fBoth = fEflInput | fEflOutput;
11795# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
11796 AssertMsg( !(fBoth & (a_fElfConst)) \
11797 || (!(fEflInput & (a_fElfConst)) \
11798 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
11799 : !(fEflOutput & (a_fElfConst)) \
11800 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
11801 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
11802 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
11803 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
11804 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
11805 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
11806 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
11807 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
11808 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
11809 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
11810# undef ASSERT_ONE_EFL
11811 }
11812# endif
11813#endif
11814
11815 /** @todo this is suboptimial. EFLAGS is probably shadowed and we should use
11816 * the existing shadow copy. */
11817 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
11818 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
11819 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
11820 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
11821 return off;
11822}
11823
11824
11825
11826/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
11827 * start using it with custom native code emission (inlining assembly
11828 * instruction helpers). */
11829#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
11830#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
11831 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
11832 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput)
11833
11834/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
11835DECL_INLINE_THROW(uint32_t)
11836iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fEflOutput)
11837{
11838 RT_NOREF(fEflOutput);
11839 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
11840 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
11841
11842#ifdef VBOX_STRICT
11843 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
11844 uint32_t offFixup = off;
11845 off = iemNativeEmitJnzToFixed(pReNative, off, off);
11846 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
11847 iemNativeFixupFixedJump(pReNative, offFixup, off);
11848
11849 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
11850 offFixup = off;
11851 off = iemNativeEmitJzToFixed(pReNative, off, off);
11852 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
11853 iemNativeFixupFixedJump(pReNative, offFixup, off);
11854
11855 /** @todo validate that only bits in the fElfOutput mask changed. */
11856#endif
11857
11858 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
11859 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
11860 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
11861 return off;
11862}
11863
11864
11865
11866/*********************************************************************************************************************************
11867* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
11868*********************************************************************************************************************************/
11869
11870#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
11871 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
11872
11873#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
11874 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
11875
11876#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
11877 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
11878
11879
11880/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
11881 * IEM_MC_FETCH_SREG_ZX_U64. */
11882DECL_INLINE_THROW(uint32_t)
11883iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
11884{
11885 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
11886 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
11887 Assert(iSReg < X86_SREG_COUNT);
11888
11889 /*
11890 * For now, we will not create a shadow copy of a selector. The rational
11891 * is that since we do not recompile the popping and loading of segment
11892 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
11893 * pushing and moving to registers, there is only a small chance that the
11894 * shadow copy will be accessed again before the register is reloaded. One
11895 * scenario would be nested called in 16-bit code, but I doubt it's worth
11896 * the extra register pressure atm.
11897 *
11898 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
11899 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
11900 * store scencario covered at present (r160730).
11901 */
11902 iemNativeVarSetKindToStack(pReNative, idxDstVar);
11903 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
11904 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
11905 iemNativeVarRegisterRelease(pReNative, idxDstVar);
11906 return off;
11907}
11908
11909
11910
11911/*********************************************************************************************************************************
11912* Register references. *
11913*********************************************************************************************************************************/
11914
11915#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
11916 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
11917
11918#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGRegEx) \
11919 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
11920
11921/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
11922DECL_INLINE_THROW(uint32_t)
11923iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
11924{
11925 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
11926 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
11927 Assert(iGRegEx < 20);
11928
11929 if (iGRegEx < 16)
11930 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
11931 else
11932 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
11933
11934 /* If we've delayed writing back the register value, flush it now. */
11935 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
11936
11937 /* If it's not a const reference we need to flush the shadow copy of the register now. */
11938 if (!fConst)
11939 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
11940
11941 return off;
11942}
11943
11944#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
11945 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
11946
11947#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
11948 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
11949
11950#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
11951 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
11952
11953#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
11954 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
11955
11956#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
11957 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
11958
11959#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
11960 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
11961
11962#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
11963 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
11964
11965#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
11966 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
11967
11968#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
11969 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
11970
11971#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
11972 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
11973
11974/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
11975DECL_INLINE_THROW(uint32_t)
11976iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
11977{
11978 Assert(iGReg < 16);
11979 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
11980 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
11981
11982 /* If we've delayed writing back the register value, flush it now. */
11983 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
11984
11985 /* If it's not a const reference we need to flush the shadow copy of the register now. */
11986 if (!fConst)
11987 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
11988
11989 return off;
11990}
11991
11992
11993#undef IEM_MC_REF_EFLAGS /* should not be used. */
11994#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
11995 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
11996 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags)
11997
11998/** Handles IEM_MC_REF_EFLAGS. */
11999DECL_INLINE_THROW(uint32_t)
12000iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
12001{
12002 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
12003 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
12004
12005 /* If we've delayed writing back the register value, flush it now. */
12006 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
12007
12008 /* If there is a shadow copy of guest EFLAGS, flush it now. */
12009 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
12010
12011 return off;
12012}
12013
12014
12015/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
12016 * different code from threaded recompiler, maybe it would be helpful. For now
12017 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
12018#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
12019
12020
12021#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg) \
12022 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, false /*fConst*/)
12023
12024#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
12025 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, true /*fConst*/)
12026
12027#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
12028 off = iemNativeEmitRefXregXxx(pReNative, off, a_pXmmDst, a_iXReg, true /*fConst*/)
12029
12030/** Handles IEM_MC_REF_XREG_xxx[_CONST]. */
12031DECL_INLINE_THROW(uint32_t)
12032iemNativeEmitRefXregXxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iXReg, bool fConst)
12033{
12034 Assert(iXReg < 16);
12035 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_XReg, iXReg);
12036 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
12037
12038 /* If we've delayed writing back the register value, flush it now. */
12039 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_XReg, iXReg);
12040
12041#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
12042 /* If it's not a const reference we need to flush the shadow copy of the register now. */
12043 if (!fConst)
12044 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(iXReg)));
12045#else
12046 RT_NOREF(fConst);
12047#endif
12048
12049 return off;
12050}
12051
12052
12053#define IEM_MC_REF_MXCSR(a_pfMxcsr) \
12054 off = iemNativeEmitRefMxcsr(pReNative, off, a_pfMxcsr)
12055
12056/** Handles IEM_MC_REF_MXCSR. */
12057DECL_INLINE_THROW(uint32_t)
12058iemNativeEmitRefMxcsr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
12059{
12060 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_MxCsr, 0);
12061 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
12062
12063 /* If we've delayed writing back the register value, flush it now. */
12064 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_MxCsr, 0);
12065
12066 /* If there is a shadow copy of guest MXCSR, flush it now. */
12067 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_MxCsr));
12068
12069 return off;
12070}
12071
12072
12073
12074/*********************************************************************************************************************************
12075* Effective Address Calculation *
12076*********************************************************************************************************************************/
12077#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
12078 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
12079
12080/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
12081 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
12082DECL_INLINE_THROW(uint32_t)
12083iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
12084 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
12085{
12086 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
12087
12088 /*
12089 * Handle the disp16 form with no registers first.
12090 *
12091 * Convert to an immediate value, as that'll delay the register allocation
12092 * and assignment till the memory access / call / whatever and we can use
12093 * a more appropriate register (or none at all).
12094 */
12095 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
12096 {
12097 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
12098 return off;
12099 }
12100
12101 /* Determin the displacment. */
12102 uint16_t u16EffAddr;
12103 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
12104 {
12105 case 0: u16EffAddr = 0; break;
12106 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
12107 case 2: u16EffAddr = u16Disp; break;
12108 default: AssertFailedStmt(u16EffAddr = 0);
12109 }
12110
12111 /* Determine the registers involved. */
12112 uint8_t idxGstRegBase;
12113 uint8_t idxGstRegIndex;
12114 switch (bRm & X86_MODRM_RM_MASK)
12115 {
12116 case 0:
12117 idxGstRegBase = X86_GREG_xBX;
12118 idxGstRegIndex = X86_GREG_xSI;
12119 break;
12120 case 1:
12121 idxGstRegBase = X86_GREG_xBX;
12122 idxGstRegIndex = X86_GREG_xDI;
12123 break;
12124 case 2:
12125 idxGstRegBase = X86_GREG_xBP;
12126 idxGstRegIndex = X86_GREG_xSI;
12127 break;
12128 case 3:
12129 idxGstRegBase = X86_GREG_xBP;
12130 idxGstRegIndex = X86_GREG_xDI;
12131 break;
12132 case 4:
12133 idxGstRegBase = X86_GREG_xSI;
12134 idxGstRegIndex = UINT8_MAX;
12135 break;
12136 case 5:
12137 idxGstRegBase = X86_GREG_xDI;
12138 idxGstRegIndex = UINT8_MAX;
12139 break;
12140 case 6:
12141 idxGstRegBase = X86_GREG_xBP;
12142 idxGstRegIndex = UINT8_MAX;
12143 break;
12144#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
12145 default:
12146#endif
12147 case 7:
12148 idxGstRegBase = X86_GREG_xBX;
12149 idxGstRegIndex = UINT8_MAX;
12150 break;
12151 }
12152
12153 /*
12154 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
12155 */
12156 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
12157 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
12158 kIemNativeGstRegUse_ReadOnly);
12159 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
12160 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
12161 kIemNativeGstRegUse_ReadOnly)
12162 : UINT8_MAX;
12163#ifdef RT_ARCH_AMD64
12164 if (idxRegIndex == UINT8_MAX)
12165 {
12166 if (u16EffAddr == 0)
12167 {
12168 /* movxz ret, base */
12169 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
12170 }
12171 else
12172 {
12173 /* lea ret32, [base64 + disp32] */
12174 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
12175 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
12176 if (idxRegRet >= 8 || idxRegBase >= 8)
12177 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
12178 pbCodeBuf[off++] = 0x8d;
12179 if (idxRegBase != X86_GREG_x12 /*SIB*/)
12180 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
12181 else
12182 {
12183 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
12184 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
12185 }
12186 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
12187 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
12188 pbCodeBuf[off++] = 0;
12189 pbCodeBuf[off++] = 0;
12190 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12191
12192 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
12193 }
12194 }
12195 else
12196 {
12197 /* lea ret32, [index64 + base64 (+ disp32)] */
12198 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
12199 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
12200 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
12201 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
12202 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
12203 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
12204 pbCodeBuf[off++] = 0x8d;
12205 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
12206 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
12207 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
12208 if (bMod == X86_MOD_MEM4)
12209 {
12210 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
12211 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
12212 pbCodeBuf[off++] = 0;
12213 pbCodeBuf[off++] = 0;
12214 }
12215 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12216 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
12217 }
12218
12219#elif defined(RT_ARCH_ARM64)
12220 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
12221 if (u16EffAddr == 0)
12222 {
12223 if (idxRegIndex == UINT8_MAX)
12224 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
12225 else
12226 {
12227 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
12228 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
12229 }
12230 }
12231 else
12232 {
12233 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
12234 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
12235 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
12236 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
12237 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
12238 else
12239 {
12240 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
12241 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
12242 }
12243 if (idxRegIndex != UINT8_MAX)
12244 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
12245 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
12246 }
12247
12248#else
12249# error "port me"
12250#endif
12251
12252 if (idxRegIndex != UINT8_MAX)
12253 iemNativeRegFreeTmp(pReNative, idxRegIndex);
12254 iemNativeRegFreeTmp(pReNative, idxRegBase);
12255 iemNativeVarRegisterRelease(pReNative, idxVarRet);
12256 return off;
12257}
12258
12259
12260#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
12261 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
12262
12263/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
12264 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
12265DECL_INLINE_THROW(uint32_t)
12266iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
12267 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
12268{
12269 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
12270
12271 /*
12272 * Handle the disp32 form with no registers first.
12273 *
12274 * Convert to an immediate value, as that'll delay the register allocation
12275 * and assignment till the memory access / call / whatever and we can use
12276 * a more appropriate register (or none at all).
12277 */
12278 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
12279 {
12280 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
12281 return off;
12282 }
12283
12284 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
12285 uint32_t u32EffAddr = 0;
12286 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
12287 {
12288 case 0: break;
12289 case 1: u32EffAddr = (int8_t)u32Disp; break;
12290 case 2: u32EffAddr = u32Disp; break;
12291 default: AssertFailed();
12292 }
12293
12294 /* Get the register (or SIB) value. */
12295 uint8_t idxGstRegBase = UINT8_MAX;
12296 uint8_t idxGstRegIndex = UINT8_MAX;
12297 uint8_t cShiftIndex = 0;
12298 switch (bRm & X86_MODRM_RM_MASK)
12299 {
12300 case 0: idxGstRegBase = X86_GREG_xAX; break;
12301 case 1: idxGstRegBase = X86_GREG_xCX; break;
12302 case 2: idxGstRegBase = X86_GREG_xDX; break;
12303 case 3: idxGstRegBase = X86_GREG_xBX; break;
12304 case 4: /* SIB */
12305 {
12306 /* index /w scaling . */
12307 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
12308 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
12309 {
12310 case 0: idxGstRegIndex = X86_GREG_xAX; break;
12311 case 1: idxGstRegIndex = X86_GREG_xCX; break;
12312 case 2: idxGstRegIndex = X86_GREG_xDX; break;
12313 case 3: idxGstRegIndex = X86_GREG_xBX; break;
12314 case 4: cShiftIndex = 0; /*no index*/ break;
12315 case 5: idxGstRegIndex = X86_GREG_xBP; break;
12316 case 6: idxGstRegIndex = X86_GREG_xSI; break;
12317 case 7: idxGstRegIndex = X86_GREG_xDI; break;
12318 }
12319
12320 /* base */
12321 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
12322 {
12323 case 0: idxGstRegBase = X86_GREG_xAX; break;
12324 case 1: idxGstRegBase = X86_GREG_xCX; break;
12325 case 2: idxGstRegBase = X86_GREG_xDX; break;
12326 case 3: idxGstRegBase = X86_GREG_xBX; break;
12327 case 4:
12328 idxGstRegBase = X86_GREG_xSP;
12329 u32EffAddr += uSibAndRspOffset >> 8;
12330 break;
12331 case 5:
12332 if ((bRm & X86_MODRM_MOD_MASK) != 0)
12333 idxGstRegBase = X86_GREG_xBP;
12334 else
12335 {
12336 Assert(u32EffAddr == 0);
12337 u32EffAddr = u32Disp;
12338 }
12339 break;
12340 case 6: idxGstRegBase = X86_GREG_xSI; break;
12341 case 7: idxGstRegBase = X86_GREG_xDI; break;
12342 }
12343 break;
12344 }
12345 case 5: idxGstRegBase = X86_GREG_xBP; break;
12346 case 6: idxGstRegBase = X86_GREG_xSI; break;
12347 case 7: idxGstRegBase = X86_GREG_xDI; break;
12348 }
12349
12350 /*
12351 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
12352 * the start of the function.
12353 */
12354 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
12355 {
12356 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
12357 return off;
12358 }
12359
12360 /*
12361 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
12362 */
12363 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
12364 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
12365 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
12366 kIemNativeGstRegUse_ReadOnly);
12367 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
12368 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
12369 kIemNativeGstRegUse_ReadOnly);
12370
12371 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
12372 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
12373 {
12374 idxRegBase = idxRegIndex;
12375 idxRegIndex = UINT8_MAX;
12376 }
12377
12378#ifdef RT_ARCH_AMD64
12379 if (idxRegIndex == UINT8_MAX)
12380 {
12381 if (u32EffAddr == 0)
12382 {
12383 /* mov ret, base */
12384 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
12385 }
12386 else
12387 {
12388 /* lea ret32, [base64 + disp32] */
12389 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
12390 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
12391 if (idxRegRet >= 8 || idxRegBase >= 8)
12392 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
12393 pbCodeBuf[off++] = 0x8d;
12394 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
12395 if (idxRegBase != X86_GREG_x12 /*SIB*/)
12396 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
12397 else
12398 {
12399 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
12400 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
12401 }
12402 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
12403 if (bMod == X86_MOD_MEM4)
12404 {
12405 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
12406 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
12407 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
12408 }
12409 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12410 }
12411 }
12412 else
12413 {
12414 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
12415 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
12416 if (idxRegBase == UINT8_MAX)
12417 {
12418 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
12419 if (idxRegRet >= 8 || idxRegIndex >= 8)
12420 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
12421 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
12422 pbCodeBuf[off++] = 0x8d;
12423 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
12424 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
12425 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
12426 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
12427 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
12428 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
12429 }
12430 else
12431 {
12432 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
12433 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
12434 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
12435 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
12436 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
12437 pbCodeBuf[off++] = 0x8d;
12438 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
12439 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
12440 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
12441 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
12442 if (bMod != X86_MOD_MEM0)
12443 {
12444 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
12445 if (bMod == X86_MOD_MEM4)
12446 {
12447 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
12448 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
12449 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
12450 }
12451 }
12452 }
12453 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12454 }
12455
12456#elif defined(RT_ARCH_ARM64)
12457 if (u32EffAddr == 0)
12458 {
12459 if (idxRegIndex == UINT8_MAX)
12460 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
12461 else if (idxRegBase == UINT8_MAX)
12462 {
12463 if (cShiftIndex == 0)
12464 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
12465 else
12466 {
12467 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12468 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
12469 }
12470 }
12471 else
12472 {
12473 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12474 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
12475 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
12476 }
12477 }
12478 else
12479 {
12480 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
12481 {
12482 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12483 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
12484 }
12485 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
12486 {
12487 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12488 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
12489 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
12490 }
12491 else
12492 {
12493 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
12494 if (idxRegBase != UINT8_MAX)
12495 {
12496 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12497 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
12498 }
12499 }
12500 if (idxRegIndex != UINT8_MAX)
12501 {
12502 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12503 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
12504 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
12505 }
12506 }
12507
12508#else
12509# error "port me"
12510#endif
12511
12512 if (idxRegIndex != UINT8_MAX)
12513 iemNativeRegFreeTmp(pReNative, idxRegIndex);
12514 if (idxRegBase != UINT8_MAX)
12515 iemNativeRegFreeTmp(pReNative, idxRegBase);
12516 iemNativeVarRegisterRelease(pReNative, idxVarRet);
12517 return off;
12518}
12519
12520
12521#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
12522 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
12523 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
12524
12525#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
12526 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
12527 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
12528
12529#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
12530 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
12531 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
12532
12533/**
12534 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
12535 *
12536 * @returns New off.
12537 * @param pReNative .
12538 * @param off .
12539 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
12540 * bit 4 to REX.X. The two bits are part of the
12541 * REG sub-field, which isn't needed in this
12542 * function.
12543 * @param uSibAndRspOffset Two parts:
12544 * - The first 8 bits make up the SIB byte.
12545 * - The next 8 bits are the fixed RSP/ESP offset
12546 * in case of a pop [xSP].
12547 * @param u32Disp The displacement byte/word/dword, if any.
12548 * @param cbInstr The size of the fully decoded instruction. Used
12549 * for RIP relative addressing.
12550 * @param idxVarRet The result variable number.
12551 * @param f64Bit Whether to use a 64-bit or 32-bit address size
12552 * when calculating the address.
12553 *
12554 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
12555 */
12556DECL_INLINE_THROW(uint32_t)
12557iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
12558 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
12559{
12560 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
12561
12562 /*
12563 * Special case the rip + disp32 form first.
12564 */
12565 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
12566 {
12567#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
12568 /* Need to take the current PC offset into account for the displacement, no need to flush here
12569 * as the PC is only accessed readonly and there is no branching or calling helpers involved. */
12570 u32Disp += pReNative->Core.offPc;
12571#endif
12572
12573 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
12574 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
12575 kIemNativeGstRegUse_ReadOnly);
12576#ifdef RT_ARCH_AMD64
12577 if (f64Bit)
12578 {
12579 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
12580 if ((int32_t)offFinalDisp == offFinalDisp)
12581 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
12582 else
12583 {
12584 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
12585 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
12586 }
12587 }
12588 else
12589 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
12590
12591#elif defined(RT_ARCH_ARM64)
12592 if (f64Bit)
12593 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
12594 (int64_t)(int32_t)u32Disp + cbInstr);
12595 else
12596 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
12597 (int32_t)u32Disp + cbInstr);
12598
12599#else
12600# error "Port me!"
12601#endif
12602 iemNativeRegFreeTmp(pReNative, idxRegPc);
12603 iemNativeVarRegisterRelease(pReNative, idxVarRet);
12604 return off;
12605 }
12606
12607 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
12608 int64_t i64EffAddr = 0;
12609 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
12610 {
12611 case 0: break;
12612 case 1: i64EffAddr = (int8_t)u32Disp; break;
12613 case 2: i64EffAddr = (int32_t)u32Disp; break;
12614 default: AssertFailed();
12615 }
12616
12617 /* Get the register (or SIB) value. */
12618 uint8_t idxGstRegBase = UINT8_MAX;
12619 uint8_t idxGstRegIndex = UINT8_MAX;
12620 uint8_t cShiftIndex = 0;
12621 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
12622 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
12623 else /* SIB: */
12624 {
12625 /* index /w scaling . */
12626 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
12627 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
12628 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
12629 if (idxGstRegIndex == 4)
12630 {
12631 /* no index */
12632 cShiftIndex = 0;
12633 idxGstRegIndex = UINT8_MAX;
12634 }
12635
12636 /* base */
12637 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
12638 if (idxGstRegBase == 4)
12639 {
12640 /* pop [rsp] hack */
12641 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
12642 }
12643 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
12644 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
12645 {
12646 /* mod=0 and base=5 -> disp32, no base reg. */
12647 Assert(i64EffAddr == 0);
12648 i64EffAddr = (int32_t)u32Disp;
12649 idxGstRegBase = UINT8_MAX;
12650 }
12651 }
12652
12653 /*
12654 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
12655 * the start of the function.
12656 */
12657 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
12658 {
12659 if (f64Bit)
12660 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
12661 else
12662 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
12663 return off;
12664 }
12665
12666 /*
12667 * Now emit code that calculates:
12668 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
12669 * or if !f64Bit:
12670 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
12671 */
12672 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
12673 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
12674 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
12675 kIemNativeGstRegUse_ReadOnly);
12676 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
12677 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
12678 kIemNativeGstRegUse_ReadOnly);
12679
12680 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
12681 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
12682 {
12683 idxRegBase = idxRegIndex;
12684 idxRegIndex = UINT8_MAX;
12685 }
12686
12687#ifdef RT_ARCH_AMD64
12688 uint8_t bFinalAdj;
12689 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
12690 bFinalAdj = 0; /* likely */
12691 else
12692 {
12693 /* pop [rsp] with a problematic disp32 value. Split out the
12694 RSP offset and add it separately afterwards (bFinalAdj). */
12695 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
12696 Assert(idxGstRegBase == X86_GREG_xSP);
12697 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
12698 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
12699 Assert(bFinalAdj != 0);
12700 i64EffAddr -= bFinalAdj;
12701 Assert((int32_t)i64EffAddr == i64EffAddr);
12702 }
12703 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
12704//pReNative->pInstrBuf[off++] = 0xcc;
12705
12706 if (idxRegIndex == UINT8_MAX)
12707 {
12708 if (u32EffAddr == 0)
12709 {
12710 /* mov ret, base */
12711 if (f64Bit)
12712 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
12713 else
12714 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
12715 }
12716 else
12717 {
12718 /* lea ret, [base + disp32] */
12719 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
12720 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
12721 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
12722 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
12723 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
12724 | (f64Bit ? X86_OP_REX_W : 0);
12725 pbCodeBuf[off++] = 0x8d;
12726 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
12727 if (idxRegBase != X86_GREG_x12 /*SIB*/)
12728 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
12729 else
12730 {
12731 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
12732 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
12733 }
12734 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
12735 if (bMod == X86_MOD_MEM4)
12736 {
12737 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
12738 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
12739 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
12740 }
12741 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12742 }
12743 }
12744 else
12745 {
12746 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
12747 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
12748 if (idxRegBase == UINT8_MAX)
12749 {
12750 /* lea ret, [(index64 << cShiftIndex) + disp32] */
12751 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
12752 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
12753 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
12754 | (f64Bit ? X86_OP_REX_W : 0);
12755 pbCodeBuf[off++] = 0x8d;
12756 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
12757 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
12758 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
12759 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
12760 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
12761 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
12762 }
12763 else
12764 {
12765 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
12766 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
12767 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
12768 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
12769 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
12770 | (f64Bit ? X86_OP_REX_W : 0);
12771 pbCodeBuf[off++] = 0x8d;
12772 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
12773 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
12774 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
12775 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
12776 if (bMod != X86_MOD_MEM0)
12777 {
12778 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
12779 if (bMod == X86_MOD_MEM4)
12780 {
12781 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
12782 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
12783 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
12784 }
12785 }
12786 }
12787 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12788 }
12789
12790 if (!bFinalAdj)
12791 { /* likely */ }
12792 else
12793 {
12794 Assert(f64Bit);
12795 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
12796 }
12797
12798#elif defined(RT_ARCH_ARM64)
12799 if (i64EffAddr == 0)
12800 {
12801 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12802 if (idxRegIndex == UINT8_MAX)
12803 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
12804 else if (idxRegBase != UINT8_MAX)
12805 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
12806 f64Bit, false /*fSetFlags*/, cShiftIndex);
12807 else
12808 {
12809 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
12810 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
12811 }
12812 }
12813 else
12814 {
12815 if (f64Bit)
12816 { /* likely */ }
12817 else
12818 i64EffAddr = (int32_t)i64EffAddr;
12819
12820 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
12821 {
12822 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12823 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
12824 }
12825 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
12826 {
12827 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12828 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
12829 }
12830 else
12831 {
12832 if (f64Bit)
12833 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
12834 else
12835 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
12836 if (idxRegBase != UINT8_MAX)
12837 {
12838 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12839 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
12840 }
12841 }
12842 if (idxRegIndex != UINT8_MAX)
12843 {
12844 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12845 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
12846 f64Bit, false /*fSetFlags*/, cShiftIndex);
12847 }
12848 }
12849
12850#else
12851# error "port me"
12852#endif
12853
12854 if (idxRegIndex != UINT8_MAX)
12855 iemNativeRegFreeTmp(pReNative, idxRegIndex);
12856 if (idxRegBase != UINT8_MAX)
12857 iemNativeRegFreeTmp(pReNative, idxRegBase);
12858 iemNativeVarRegisterRelease(pReNative, idxVarRet);
12859 return off;
12860}
12861
12862
12863/*********************************************************************************************************************************
12864* TLB Lookup. *
12865*********************************************************************************************************************************/
12866
12867/**
12868 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
12869 */
12870DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
12871{
12872 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
12873 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
12874 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
12875 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
12876
12877 /* Do the lookup manually. */
12878 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
12879 uint64_t const uTag = IEMTLB_CALC_TAG( &pVCpu->iem.s.DataTlb, GCPtrFlat);
12880 PIEMTLBENTRY const pTlbe = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
12881 if (RT_LIKELY(pTlbe->uTag == uTag))
12882 {
12883 /*
12884 * Check TLB page table level access flags.
12885 */
12886 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
12887 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
12888 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
12889 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
12890 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
12891 | IEMTLBE_F_PG_UNASSIGNED
12892 | IEMTLBE_F_PT_NO_ACCESSED
12893 | fNoWriteNoDirty | fNoUser);
12894 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
12895 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
12896 {
12897 /*
12898 * Return the address.
12899 */
12900 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
12901 if ((uintptr_t)pbAddr == uResult)
12902 return;
12903 RT_NOREF(cbMem);
12904 AssertFailed();
12905 }
12906 else
12907 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
12908 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
12909 }
12910 else
12911 AssertFailed();
12912 RT_BREAKPOINT();
12913}
12914
12915/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
12916
12917
12918/*********************************************************************************************************************************
12919* Memory fetches and stores common *
12920*********************************************************************************************************************************/
12921
12922typedef enum IEMNATIVEMITMEMOP
12923{
12924 kIemNativeEmitMemOp_Store = 0,
12925 kIemNativeEmitMemOp_Fetch,
12926 kIemNativeEmitMemOp_Fetch_Zx_U16,
12927 kIemNativeEmitMemOp_Fetch_Zx_U32,
12928 kIemNativeEmitMemOp_Fetch_Zx_U64,
12929 kIemNativeEmitMemOp_Fetch_Sx_U16,
12930 kIemNativeEmitMemOp_Fetch_Sx_U32,
12931 kIemNativeEmitMemOp_Fetch_Sx_U64
12932} IEMNATIVEMITMEMOP;
12933
12934/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
12935 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
12936 * (with iSegReg = UINT8_MAX). */
12937DECL_INLINE_THROW(uint32_t)
12938iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
12939 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint8_t fAlignMask, IEMNATIVEMITMEMOP enmOp,
12940 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
12941{
12942 /*
12943 * Assert sanity.
12944 */
12945 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
12946 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
12947 Assert( enmOp != kIemNativeEmitMemOp_Store
12948 || pVarValue->enmKind == kIemNativeVarKind_Immediate
12949 || pVarValue->enmKind == kIemNativeVarKind_Stack);
12950 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
12951 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
12952 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
12953 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
12954 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
12955 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
12956 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
12957 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
12958#ifdef VBOX_STRICT
12959 if (iSegReg == UINT8_MAX)
12960 {
12961 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
12962 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
12963 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
12964 switch (cbMem)
12965 {
12966 case 1:
12967 Assert( pfnFunction
12968 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
12969 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
12970 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
12971 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
12972 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
12973 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
12974 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
12975 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
12976 : UINT64_C(0xc000b000a0009000) ));
12977 break;
12978 case 2:
12979 Assert( pfnFunction
12980 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
12981 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
12982 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
12983 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
12984 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
12985 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
12986 : UINT64_C(0xc000b000a0009000) ));
12987 break;
12988 case 4:
12989 Assert( pfnFunction
12990 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
12991 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
12992 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
12993 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
12994 : UINT64_C(0xc000b000a0009000) ));
12995 break;
12996 case 8:
12997 Assert( pfnFunction
12998 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
12999 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
13000 : UINT64_C(0xc000b000a0009000) ));
13001 break;
13002 }
13003 }
13004 else
13005 {
13006 Assert(iSegReg < 6);
13007 switch (cbMem)
13008 {
13009 case 1:
13010 Assert( pfnFunction
13011 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
13012 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
13013 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
13014 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
13015 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
13016 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
13017 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
13018 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
13019 : UINT64_C(0xc000b000a0009000) ));
13020 break;
13021 case 2:
13022 Assert( pfnFunction
13023 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
13024 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
13025 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
13026 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
13027 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
13028 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
13029 : UINT64_C(0xc000b000a0009000) ));
13030 break;
13031 case 4:
13032 Assert( pfnFunction
13033 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
13034 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
13035 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
13036 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
13037 : UINT64_C(0xc000b000a0009000) ));
13038 break;
13039 case 8:
13040 Assert( pfnFunction
13041 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
13042 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
13043 : UINT64_C(0xc000b000a0009000) ));
13044 break;
13045 }
13046 }
13047#endif
13048
13049#ifdef VBOX_STRICT
13050 /*
13051 * Check that the fExec flags we've got make sense.
13052 */
13053 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
13054#endif
13055
13056 /*
13057 * To keep things simple we have to commit any pending writes first as we
13058 * may end up making calls.
13059 */
13060 /** @todo we could postpone this till we make the call and reload the
13061 * registers after returning from the call. Not sure if that's sensible or
13062 * not, though. */
13063#ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
13064 off = iemNativeRegFlushPendingWrites(pReNative, off);
13065#else
13066 /* The program counter is treated differently for now. */
13067 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
13068#endif
13069
13070#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13071 /*
13072 * Move/spill/flush stuff out of call-volatile registers.
13073 * This is the easy way out. We could contain this to the tlb-miss branch
13074 * by saving and restoring active stuff here.
13075 */
13076 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
13077#endif
13078
13079 /*
13080 * Define labels and allocate the result register (trying for the return
13081 * register if we can).
13082 */
13083 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
13084 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
13085 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
13086 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
13087 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
13088 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
13089 uint8_t const idxRegValueStore = !TlbState.fSkip
13090 && enmOp == kIemNativeEmitMemOp_Store
13091 && pVarValue->enmKind != kIemNativeVarKind_Immediate
13092 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off)
13093 : UINT8_MAX;
13094 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
13095 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
13096 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
13097 : UINT32_MAX;
13098
13099 /*
13100 * Jump to the TLB lookup code.
13101 */
13102 if (!TlbState.fSkip)
13103 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
13104
13105 /*
13106 * TlbMiss:
13107 *
13108 * Call helper to do the fetching.
13109 * We flush all guest register shadow copies here.
13110 */
13111 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
13112
13113#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
13114 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
13115#else
13116 RT_NOREF(idxInstr);
13117#endif
13118
13119#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
13120 if (pReNative->Core.offPc)
13121 {
13122 /*
13123 * Update the program counter but restore it at the end of the TlbMiss branch.
13124 * This should allow delaying more program counter updates for the TlbLookup and hit paths
13125 * which are hopefully much more frequent, reducing the amount of memory accesses.
13126 */
13127 /* Allocate a temporary PC register. */
13128 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
13129
13130 /* Perform the addition and store the result. */
13131 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
13132 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
13133
13134 /* Free and flush the PC register. */
13135 iemNativeRegFreeTmp(pReNative, idxPcReg);
13136 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
13137 }
13138#endif
13139
13140#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13141 /* Save variables in volatile registers. */
13142 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
13143 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
13144 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
13145 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
13146#endif
13147
13148 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
13149 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
13150 if (enmOp == kIemNativeEmitMemOp_Store)
13151 {
13152 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
13153 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
13154#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13155 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
13156#else
13157 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
13158 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
13159#endif
13160 }
13161
13162 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
13163 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
13164#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13165 fVolGregMask);
13166#else
13167 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
13168#endif
13169
13170 if (iSegReg != UINT8_MAX)
13171 {
13172 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
13173 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
13174 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
13175 }
13176
13177 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
13178 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
13179
13180 /* Done setting up parameters, make the call. */
13181 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
13182
13183 /*
13184 * Put the result in the right register if this is a fetch.
13185 */
13186 if (enmOp != kIemNativeEmitMemOp_Store)
13187 {
13188 Assert(idxRegValueFetch == pVarValue->idxReg);
13189 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
13190 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
13191 }
13192
13193#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13194 /* Restore variables and guest shadow registers to volatile registers. */
13195 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
13196 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
13197#endif
13198
13199#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
13200 if (pReNative->Core.offPc)
13201 {
13202 /*
13203 * Time to restore the program counter to its original value.
13204 */
13205 /* Allocate a temporary PC register. */
13206 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
13207
13208 /* Restore the original value. */
13209 off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
13210 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
13211
13212 /* Free and flush the PC register. */
13213 iemNativeRegFreeTmp(pReNative, idxPcReg);
13214 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
13215 }
13216#endif
13217
13218#ifdef IEMNATIVE_WITH_TLB_LOOKUP
13219 if (!TlbState.fSkip)
13220 {
13221 /* end of TlbMiss - Jump to the done label. */
13222 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
13223 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
13224
13225 /*
13226 * TlbLookup:
13227 */
13228 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask,
13229 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
13230 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
13231
13232 /*
13233 * Emit code to do the actual storing / fetching.
13234 */
13235 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
13236# ifdef VBOX_WITH_STATISTICS
13237 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
13238 enmOp == kIemNativeEmitMemOp_Store
13239 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
13240 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
13241# endif
13242 switch (enmOp)
13243 {
13244 case kIemNativeEmitMemOp_Store:
13245 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
13246 {
13247 switch (cbMem)
13248 {
13249 case 1:
13250 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
13251 break;
13252 case 2:
13253 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
13254 break;
13255 case 4:
13256 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
13257 break;
13258 case 8:
13259 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
13260 break;
13261 default:
13262 AssertFailed();
13263 }
13264 }
13265 else
13266 {
13267 switch (cbMem)
13268 {
13269 case 1:
13270 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
13271 idxRegMemResult, TlbState.idxReg1);
13272 break;
13273 case 2:
13274 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
13275 idxRegMemResult, TlbState.idxReg1);
13276 break;
13277 case 4:
13278 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
13279 idxRegMemResult, TlbState.idxReg1);
13280 break;
13281 case 8:
13282 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
13283 idxRegMemResult, TlbState.idxReg1);
13284 break;
13285 default:
13286 AssertFailed();
13287 }
13288 }
13289 break;
13290
13291 case kIemNativeEmitMemOp_Fetch:
13292 case kIemNativeEmitMemOp_Fetch_Zx_U16:
13293 case kIemNativeEmitMemOp_Fetch_Zx_U32:
13294 case kIemNativeEmitMemOp_Fetch_Zx_U64:
13295 switch (cbMem)
13296 {
13297 case 1:
13298 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13299 break;
13300 case 2:
13301 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13302 break;
13303 case 4:
13304 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13305 break;
13306 case 8:
13307 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13308 break;
13309 default:
13310 AssertFailed();
13311 }
13312 break;
13313
13314 case kIemNativeEmitMemOp_Fetch_Sx_U16:
13315 Assert(cbMem == 1);
13316 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13317 break;
13318
13319 case kIemNativeEmitMemOp_Fetch_Sx_U32:
13320 Assert(cbMem == 1 || cbMem == 2);
13321 if (cbMem == 1)
13322 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13323 else
13324 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13325 break;
13326
13327 case kIemNativeEmitMemOp_Fetch_Sx_U64:
13328 switch (cbMem)
13329 {
13330 case 1:
13331 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13332 break;
13333 case 2:
13334 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13335 break;
13336 case 4:
13337 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13338 break;
13339 default:
13340 AssertFailed();
13341 }
13342 break;
13343
13344 default:
13345 AssertFailed();
13346 }
13347
13348 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
13349
13350 /*
13351 * TlbDone:
13352 */
13353 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
13354
13355 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
13356
13357# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13358 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
13359 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
13360# endif
13361 }
13362#else
13363 RT_NOREF(fAlignMask, idxLabelTlbMiss);
13364#endif
13365
13366 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
13367 iemNativeVarRegisterRelease(pReNative, idxVarValue);
13368 return off;
13369}
13370
13371
13372
13373/*********************************************************************************************************************************
13374* Memory fetches (IEM_MEM_FETCH_XXX). *
13375*********************************************************************************************************************************/
13376
13377/* 8-bit segmented: */
13378#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
13379 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
13380 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
13381 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
13382
13383#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
13384 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
13385 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
13386 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
13387
13388#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
13389 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
13390 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
13391 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
13392
13393#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
13394 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
13395 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
13396 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
13397
13398#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
13399 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
13400 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
13401 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
13402
13403#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
13404 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
13405 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
13406 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
13407
13408#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
13409 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
13410 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
13411 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
13412
13413/* 16-bit segmented: */
13414#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
13415 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
13416 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
13417 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
13418
13419#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
13420 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
13421 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
13422 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
13423
13424#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
13425 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
13426 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
13427 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
13428
13429#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
13430 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
13431 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
13432 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
13433
13434#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
13435 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
13436 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
13437 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
13438
13439#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
13440 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
13441 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
13442 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
13443
13444
13445/* 32-bit segmented: */
13446#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
13447 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
13448 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
13449 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
13450
13451#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
13452 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
13453 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
13454 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
13455
13456#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
13457 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
13458 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
13459 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
13460
13461#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
13462 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
13463 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
13464 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
13465
13466
13467/* 64-bit segmented: */
13468#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
13469 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
13470 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
13471 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
13472
13473
13474
13475/* 8-bit flat: */
13476#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
13477 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
13478 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
13479 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
13480
13481#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
13482 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
13483 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
13484 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
13485
13486#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
13487 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
13488 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
13489 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
13490
13491#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
13492 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
13493 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
13494 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
13495
13496#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
13497 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
13498 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
13499 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
13500
13501#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
13502 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
13503 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
13504 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
13505
13506#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
13507 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
13508 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
13509 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
13510
13511
13512/* 16-bit flat: */
13513#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
13514 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
13515 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
13516 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
13517
13518#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
13519 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
13520 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
13521 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
13522
13523#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
13524 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
13525 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
13526 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
13527
13528#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
13529 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
13530 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
13531 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
13532
13533#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
13534 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
13535 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
13536 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
13537
13538#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
13539 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
13540 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
13541 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
13542
13543/* 32-bit flat: */
13544#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
13545 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
13546 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
13547 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
13548
13549#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
13550 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
13551 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
13552 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
13553
13554#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
13555 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
13556 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
13557 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
13558
13559#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
13560 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
13561 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
13562 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
13563
13564/* 64-bit flat: */
13565#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
13566 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
13567 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
13568 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
13569
13570
13571
13572/*********************************************************************************************************************************
13573* Memory stores (IEM_MEM_STORE_XXX). *
13574*********************************************************************************************************************************/
13575
13576#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
13577 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
13578 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
13579 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
13580
13581#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
13582 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
13583 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
13584 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
13585
13586#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
13587 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
13588 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
13589 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
13590
13591#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
13592 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
13593 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
13594 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
13595
13596
13597#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
13598 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
13599 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
13600 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
13601
13602#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
13603 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
13604 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
13605 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
13606
13607#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
13608 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
13609 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
13610 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
13611
13612#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
13613 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
13614 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
13615 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
13616
13617
13618#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
13619 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
13620 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
13621
13622#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
13623 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
13624 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
13625
13626#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
13627 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
13628 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
13629
13630#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
13631 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
13632 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
13633
13634
13635#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
13636 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
13637 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
13638
13639#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
13640 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
13641 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
13642
13643#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
13644 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
13645 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
13646
13647#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
13648 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
13649 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
13650
13651/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
13652 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
13653DECL_INLINE_THROW(uint32_t)
13654iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
13655 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
13656{
13657 /*
13658 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
13659 * to do the grunt work.
13660 */
13661 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
13662 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
13663 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
13664 pfnFunction, idxInstr);
13665 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
13666 return off;
13667}
13668
13669
13670
13671/*********************************************************************************************************************************
13672* Stack Accesses. *
13673*********************************************************************************************************************************/
13674/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
13675#define IEM_MC_PUSH_U16(a_u16Value) \
13676 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
13677 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
13678#define IEM_MC_PUSH_U32(a_u32Value) \
13679 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
13680 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
13681#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
13682 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
13683 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
13684#define IEM_MC_PUSH_U64(a_u64Value) \
13685 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
13686 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
13687
13688#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
13689 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
13690 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
13691#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
13692 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
13693 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
13694#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
13695 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
13696 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
13697
13698#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
13699 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
13700 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
13701#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
13702 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
13703 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
13704
13705
13706DECL_FORCE_INLINE_THROW(uint32_t)
13707iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
13708{
13709 /* Use16BitSp: */
13710#ifdef RT_ARCH_AMD64
13711 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
13712 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
13713#else
13714 /* sub regeff, regrsp, #cbMem */
13715 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
13716 /* and regeff, regeff, #0xffff */
13717 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
13718 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
13719 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
13720 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
13721#endif
13722 return off;
13723}
13724
13725
13726DECL_FORCE_INLINE(uint32_t)
13727iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
13728{
13729 /* Use32BitSp: */
13730 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
13731 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
13732 return off;
13733}
13734
13735
13736/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
13737DECL_INLINE_THROW(uint32_t)
13738iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
13739 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
13740{
13741 /*
13742 * Assert sanity.
13743 */
13744 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
13745 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
13746#ifdef VBOX_STRICT
13747 if (RT_BYTE2(cBitsVarAndFlat) != 0)
13748 {
13749 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
13750 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
13751 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
13752 Assert( pfnFunction
13753 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
13754 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
13755 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
13756 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
13757 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
13758 : UINT64_C(0xc000b000a0009000) ));
13759 }
13760 else
13761 Assert( pfnFunction
13762 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
13763 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
13764 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
13765 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
13766 : UINT64_C(0xc000b000a0009000) ));
13767#endif
13768
13769#ifdef VBOX_STRICT
13770 /*
13771 * Check that the fExec flags we've got make sense.
13772 */
13773 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
13774#endif
13775
13776 /*
13777 * To keep things simple we have to commit any pending writes first as we
13778 * may end up making calls.
13779 */
13780 /** @todo we could postpone this till we make the call and reload the
13781 * registers after returning from the call. Not sure if that's sensible or
13782 * not, though. */
13783 off = iemNativeRegFlushPendingWrites(pReNative, off);
13784
13785 /*
13786 * First we calculate the new RSP and the effective stack pointer value.
13787 * For 64-bit mode and flat 32-bit these two are the same.
13788 * (Code structure is very similar to that of PUSH)
13789 */
13790 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
13791 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
13792 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
13793 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
13794 ? cbMem : sizeof(uint16_t);
13795 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
13796 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
13797 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
13798 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
13799 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
13800 if (cBitsFlat != 0)
13801 {
13802 Assert(idxRegEffSp == idxRegRsp);
13803 Assert(cBitsFlat == 32 || cBitsFlat == 64);
13804 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
13805 if (cBitsFlat == 64)
13806 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
13807 else
13808 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
13809 }
13810 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
13811 {
13812 Assert(idxRegEffSp != idxRegRsp);
13813 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
13814 kIemNativeGstRegUse_ReadOnly);
13815#ifdef RT_ARCH_AMD64
13816 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
13817#else
13818 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
13819#endif
13820 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
13821 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
13822 offFixupJumpToUseOtherBitSp = off;
13823 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
13824 {
13825 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
13826 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
13827 }
13828 else
13829 {
13830 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
13831 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
13832 }
13833 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
13834 }
13835 /* SpUpdateEnd: */
13836 uint32_t const offLabelSpUpdateEnd = off;
13837
13838 /*
13839 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
13840 * we're skipping lookup).
13841 */
13842 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
13843 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
13844 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
13845 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
13846 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
13847 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
13848 : UINT32_MAX;
13849 uint8_t const idxRegValue = !TlbState.fSkip
13850 && pVarValue->enmKind != kIemNativeVarKind_Immediate
13851 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
13852 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
13853 : UINT8_MAX;
13854 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
13855
13856
13857 if (!TlbState.fSkip)
13858 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
13859 else
13860 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
13861
13862 /*
13863 * Use16BitSp:
13864 */
13865 if (cBitsFlat == 0)
13866 {
13867#ifdef RT_ARCH_AMD64
13868 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
13869#else
13870 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
13871#endif
13872 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
13873 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
13874 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
13875 else
13876 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
13877 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
13878 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
13879 }
13880
13881 /*
13882 * TlbMiss:
13883 *
13884 * Call helper to do the pushing.
13885 */
13886 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
13887
13888#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
13889 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
13890#else
13891 RT_NOREF(idxInstr);
13892#endif
13893
13894 /* Save variables in volatile registers. */
13895 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
13896 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
13897 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
13898 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
13899 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
13900
13901 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
13902 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
13903 {
13904 /* Swap them using ARG0 as temp register: */
13905 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
13906 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
13907 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
13908 }
13909 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
13910 {
13911 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
13912 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
13913 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
13914
13915 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
13916 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
13917 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
13918 }
13919 else
13920 {
13921 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
13922 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
13923
13924 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
13925 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
13926 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~IEMNATIVE_CALL_ARG1_GREG);
13927 }
13928
13929 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
13930 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
13931
13932 /* Done setting up parameters, make the call. */
13933 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
13934
13935 /* Restore variables and guest shadow registers to volatile registers. */
13936 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
13937 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
13938
13939#ifdef IEMNATIVE_WITH_TLB_LOOKUP
13940 if (!TlbState.fSkip)
13941 {
13942 /* end of TlbMiss - Jump to the done label. */
13943 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
13944 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
13945
13946 /*
13947 * TlbLookup:
13948 */
13949 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
13950 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
13951
13952 /*
13953 * Emit code to do the actual storing / fetching.
13954 */
13955 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
13956# ifdef VBOX_WITH_STATISTICS
13957 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
13958 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
13959# endif
13960 if (idxRegValue != UINT8_MAX)
13961 {
13962 switch (cbMemAccess)
13963 {
13964 case 2:
13965 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
13966 break;
13967 case 4:
13968 if (!fIsIntelSeg)
13969 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
13970 else
13971 {
13972 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
13973 PUSH FS in real mode, so we have to try emulate that here.
13974 We borrow the now unused idxReg1 from the TLB lookup code here. */
13975 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
13976 kIemNativeGstReg_EFlags);
13977 if (idxRegEfl != UINT8_MAX)
13978 {
13979#ifdef ARCH_AMD64
13980 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
13981 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
13982 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
13983#else
13984 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
13985 off, TlbState.idxReg1, idxRegEfl,
13986 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
13987#endif
13988 iemNativeRegFreeTmp(pReNative, idxRegEfl);
13989 }
13990 else
13991 {
13992 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
13993 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
13994 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
13995 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
13996 }
13997 /* ASSUMES the upper half of idxRegValue is ZERO. */
13998 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
13999 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
14000 }
14001 break;
14002 case 8:
14003 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
14004 break;
14005 default:
14006 AssertFailed();
14007 }
14008 }
14009 else
14010 {
14011 switch (cbMemAccess)
14012 {
14013 case 2:
14014 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
14015 idxRegMemResult, TlbState.idxReg1);
14016 break;
14017 case 4:
14018 Assert(!fIsSegReg);
14019 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
14020 idxRegMemResult, TlbState.idxReg1);
14021 break;
14022 case 8:
14023 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
14024 break;
14025 default:
14026 AssertFailed();
14027 }
14028 }
14029
14030 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
14031 TlbState.freeRegsAndReleaseVars(pReNative);
14032
14033 /*
14034 * TlbDone:
14035 *
14036 * Commit the new RSP value.
14037 */
14038 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
14039 }
14040#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
14041
14042 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
14043 iemNativeRegFreeTmp(pReNative, idxRegRsp);
14044 if (idxRegEffSp != idxRegRsp)
14045 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
14046
14047 /* The value variable is implictly flushed. */
14048 if (idxRegValue != UINT8_MAX)
14049 iemNativeVarRegisterRelease(pReNative, idxVarValue);
14050 iemNativeVarFreeLocal(pReNative, idxVarValue);
14051
14052 return off;
14053}
14054
14055
14056
14057/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
14058#define IEM_MC_POP_GREG_U16(a_iGReg) \
14059 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
14060 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
14061#define IEM_MC_POP_GREG_U32(a_iGReg) \
14062 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
14063 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
14064#define IEM_MC_POP_GREG_U64(a_iGReg) \
14065 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
14066 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
14067
14068#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
14069 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
14070 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
14071#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
14072 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
14073 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
14074
14075#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
14076 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
14077 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
14078#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
14079 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
14080 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
14081
14082
14083DECL_FORCE_INLINE_THROW(uint32_t)
14084iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
14085 uint8_t idxRegTmp)
14086{
14087 /* Use16BitSp: */
14088#ifdef RT_ARCH_AMD64
14089 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
14090 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
14091 RT_NOREF(idxRegTmp);
14092#else
14093 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
14094 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
14095 /* add tmp, regrsp, #cbMem */
14096 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
14097 /* and tmp, tmp, #0xffff */
14098 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
14099 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
14100 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
14101 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
14102#endif
14103 return off;
14104}
14105
14106
14107DECL_FORCE_INLINE(uint32_t)
14108iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
14109{
14110 /* Use32BitSp: */
14111 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
14112 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
14113 return off;
14114}
14115
14116
14117/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
14118DECL_INLINE_THROW(uint32_t)
14119iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
14120 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
14121{
14122 /*
14123 * Assert sanity.
14124 */
14125 Assert(idxGReg < 16);
14126#ifdef VBOX_STRICT
14127 if (RT_BYTE2(cBitsVarAndFlat) != 0)
14128 {
14129 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
14130 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
14131 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
14132 Assert( pfnFunction
14133 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
14134 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
14135 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
14136 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
14137 : UINT64_C(0xc000b000a0009000) ));
14138 }
14139 else
14140 Assert( pfnFunction
14141 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
14142 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
14143 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
14144 : UINT64_C(0xc000b000a0009000) ));
14145#endif
14146
14147#ifdef VBOX_STRICT
14148 /*
14149 * Check that the fExec flags we've got make sense.
14150 */
14151 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
14152#endif
14153
14154 /*
14155 * To keep things simple we have to commit any pending writes first as we
14156 * may end up making calls.
14157 */
14158 off = iemNativeRegFlushPendingWrites(pReNative, off);
14159
14160 /*
14161 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
14162 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
14163 * directly as the effective stack pointer.
14164 * (Code structure is very similar to that of PUSH)
14165 */
14166 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
14167 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
14168 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
14169 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
14170 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
14171 /** @todo can do a better job picking the register here. For cbMem >= 4 this
14172 * will be the resulting register value. */
14173 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
14174
14175 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
14176 if (cBitsFlat != 0)
14177 {
14178 Assert(idxRegEffSp == idxRegRsp);
14179 Assert(cBitsFlat == 32 || cBitsFlat == 64);
14180 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
14181 }
14182 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
14183 {
14184 Assert(idxRegEffSp != idxRegRsp);
14185 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
14186 kIemNativeGstRegUse_ReadOnly);
14187#ifdef RT_ARCH_AMD64
14188 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
14189#else
14190 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
14191#endif
14192 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
14193 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
14194 offFixupJumpToUseOtherBitSp = off;
14195 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
14196 {
14197/** @todo can skip idxRegRsp updating when popping ESP. */
14198 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
14199 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
14200 }
14201 else
14202 {
14203 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
14204 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
14205 }
14206 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
14207 }
14208 /* SpUpdateEnd: */
14209 uint32_t const offLabelSpUpdateEnd = off;
14210
14211 /*
14212 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
14213 * we're skipping lookup).
14214 */
14215 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
14216 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
14217 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
14218 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
14219 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
14220 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
14221 : UINT32_MAX;
14222
14223 if (!TlbState.fSkip)
14224 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
14225 else
14226 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
14227
14228 /*
14229 * Use16BitSp:
14230 */
14231 if (cBitsFlat == 0)
14232 {
14233#ifdef RT_ARCH_AMD64
14234 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
14235#else
14236 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
14237#endif
14238 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
14239 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
14240 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
14241 else
14242 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
14243 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
14244 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
14245 }
14246
14247 /*
14248 * TlbMiss:
14249 *
14250 * Call helper to do the pushing.
14251 */
14252 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
14253
14254#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
14255 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
14256#else
14257 RT_NOREF(idxInstr);
14258#endif
14259
14260 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
14261 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
14262 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
14263 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
14264
14265
14266 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
14267 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
14268 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
14269
14270 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
14271 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
14272
14273 /* Done setting up parameters, make the call. */
14274 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
14275
14276 /* Move the return register content to idxRegMemResult. */
14277 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
14278 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
14279
14280 /* Restore variables and guest shadow registers to volatile registers. */
14281 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
14282 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
14283
14284#ifdef IEMNATIVE_WITH_TLB_LOOKUP
14285 if (!TlbState.fSkip)
14286 {
14287 /* end of TlbMiss - Jump to the done label. */
14288 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
14289 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
14290
14291 /*
14292 * TlbLookup:
14293 */
14294 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
14295 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
14296
14297 /*
14298 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
14299 */
14300 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
14301# ifdef VBOX_WITH_STATISTICS
14302 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
14303 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
14304# endif
14305 switch (cbMem)
14306 {
14307 case 2:
14308 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
14309 break;
14310 case 4:
14311 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
14312 break;
14313 case 8:
14314 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
14315 break;
14316 default:
14317 AssertFailed();
14318 }
14319
14320 TlbState.freeRegsAndReleaseVars(pReNative);
14321
14322 /*
14323 * TlbDone:
14324 *
14325 * Set the new RSP value (FLAT accesses needs to calculate it first) and
14326 * commit the popped register value.
14327 */
14328 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
14329 }
14330#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
14331
14332 if (idxGReg != X86_GREG_xSP)
14333 {
14334 /* Set the register. */
14335 if (cbMem >= sizeof(uint32_t))
14336 {
14337#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
14338 AssertMsg( pReNative->idxCurCall == 0
14339 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
14340 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
14341#endif
14342 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
14343 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
14344 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
14345 }
14346 else
14347 {
14348 Assert(cbMem == sizeof(uint16_t));
14349 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
14350 kIemNativeGstRegUse_ForUpdate);
14351 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
14352 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
14353 iemNativeRegFreeTmp(pReNative, idxRegDst);
14354 }
14355
14356 /* Complete RSP calculation for FLAT mode. */
14357 if (idxRegEffSp == idxRegRsp)
14358 {
14359 if (cBitsFlat == 64)
14360 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
14361 else
14362 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
14363 }
14364 }
14365 else
14366 {
14367 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
14368 if (cbMem == sizeof(uint64_t))
14369 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
14370 else if (cbMem == sizeof(uint32_t))
14371 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
14372 else
14373 {
14374 if (idxRegEffSp == idxRegRsp)
14375 {
14376 if (cBitsFlat == 64)
14377 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
14378 else
14379 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
14380 }
14381 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
14382 }
14383 }
14384 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
14385
14386 iemNativeRegFreeTmp(pReNative, idxRegRsp);
14387 if (idxRegEffSp != idxRegRsp)
14388 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
14389 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
14390
14391 return off;
14392}
14393
14394
14395
14396/*********************************************************************************************************************************
14397* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
14398*********************************************************************************************************************************/
14399
14400#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14401 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
14402 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
14403 (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
14404
14405#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14406 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
14407 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
14408 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
14409
14410#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14411 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
14412 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
14413 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
14414
14415#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14416 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
14417 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
14418 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
14419
14420
14421#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14422 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
14423 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14424 (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
14425
14426#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14427 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
14428 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14429 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
14430
14431#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14432 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
14433 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14434 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
14435
14436#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14437 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
14438 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14439 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
14440
14441#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14442 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
14443 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14444 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
14445
14446
14447#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14448 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
14449 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14450 (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
14451
14452#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14453 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
14454 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14455 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
14456
14457#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14458 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
14459 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14460 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
14461
14462#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14463 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
14464 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14465 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
14466
14467#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14468 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
14469 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14470 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
14471
14472
14473#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14474 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
14475 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14476 (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
14477
14478#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14479 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
14480 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14481 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
14482#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14483 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
14484 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14485 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
14486
14487#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14488 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
14489 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14490 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
14491
14492#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14493 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
14494 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14495 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
14496
14497
14498#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14499 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
14500 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14501 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
14502
14503#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14504 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
14505 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
14506 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
14507
14508
14509#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14510 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
14511 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14512 (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
14513
14514#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14515 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
14516 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14517 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
14518
14519#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14520 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
14521 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14522 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
14523
14524#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14525 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
14526 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14527 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
14528
14529
14530
14531#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
14532 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
14533 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
14534 (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
14535
14536#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
14537 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
14538 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
14539 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
14540
14541#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
14542 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
14543 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
14544 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
14545
14546#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
14547 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
14548 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
14549 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
14550
14551
14552#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
14553 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
14554 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14555 (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
14556
14557#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
14558 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
14559 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14560 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
14561
14562#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
14563 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
14564 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14565 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
14566
14567#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
14568 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
14569 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14570 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
14571
14572#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
14573 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
14574 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14575 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
14576
14577
14578#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
14579 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
14580 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14581 (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
14582
14583#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
14584 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
14585 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14586 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
14587
14588#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
14589 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
14590 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14591 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
14592
14593#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
14594 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
14595 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14596 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
14597
14598#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
14599 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
14600 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14601 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
14602
14603
14604#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
14605 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
14606 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14607 (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
14608
14609#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
14610 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
14611 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14612 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
14613
14614#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
14615 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
14616 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14617 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
14618
14619#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
14620 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
14621 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14622 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
14623
14624#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
14625 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
14626 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14627 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
14628
14629
14630#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
14631 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
14632 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14633 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
14634
14635#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
14636 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
14637 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
14638 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
14639
14640
14641#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
14642 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
14643 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14644 (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
14645
14646#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
14647 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
14648 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14649 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
14650
14651#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
14652 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
14653 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14654 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
14655
14656#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
14657 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
14658 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14659 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
14660
14661
14662DECL_INLINE_THROW(uint32_t)
14663iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
14664 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint8_t fAlignMask,
14665 uintptr_t pfnFunction, uint8_t idxInstr)
14666{
14667 /*
14668 * Assert sanity.
14669 */
14670 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
14671 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
14672 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
14673 && pVarMem->cbVar == sizeof(void *),
14674 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
14675
14676 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
14677 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
14678 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
14679 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
14680 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
14681
14682 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
14683 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
14684 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
14685 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
14686 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
14687
14688 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
14689
14690 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
14691
14692#ifdef VBOX_STRICT
14693# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
14694 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
14695 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
14696 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
14697 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
14698# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
14699 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
14700 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
14701 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
14702
14703 if (iSegReg == UINT8_MAX)
14704 {
14705 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
14706 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
14707 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
14708 switch (cbMem)
14709 {
14710 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8)); break;
14711 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16)); break;
14712 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32)); break;
14713 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64)); break;
14714 case 10:
14715 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
14716 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
14717 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
14718 break;
14719 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128)); break;
14720# if 0
14721 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256)); break;
14722 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512)); break;
14723# endif
14724 default: AssertFailed(); break;
14725 }
14726 }
14727 else
14728 {
14729 Assert(iSegReg < 6);
14730 switch (cbMem)
14731 {
14732 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8)); break;
14733 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16)); break;
14734 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32)); break;
14735 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64)); break;
14736 case 10:
14737 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
14738 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
14739 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
14740 break;
14741 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128)); break;
14742# if 0
14743 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256)); break;
14744 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512)); break;
14745# endif
14746 default: AssertFailed(); break;
14747 }
14748 }
14749# undef IEM_MAP_HLP_FN
14750# undef IEM_MAP_HLP_FN_NO_AT
14751#endif
14752
14753#ifdef VBOX_STRICT
14754 /*
14755 * Check that the fExec flags we've got make sense.
14756 */
14757 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
14758#endif
14759
14760 /*
14761 * To keep things simple we have to commit any pending writes first as we
14762 * may end up making calls.
14763 */
14764 off = iemNativeRegFlushPendingWrites(pReNative, off);
14765
14766#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
14767 /*
14768 * Move/spill/flush stuff out of call-volatile registers.
14769 * This is the easy way out. We could contain this to the tlb-miss branch
14770 * by saving and restoring active stuff here.
14771 */
14772 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
14773 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
14774#endif
14775
14776 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
14777 while the tlb-miss codepath will temporarily put it on the stack.
14778 Set the the type to stack here so we don't need to do it twice below. */
14779 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
14780 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
14781 /** @todo use a tmp register from TlbState, since they'll be free after tlb
14782 * lookup is done. */
14783
14784 /*
14785 * Define labels and allocate the result register (trying for the return
14786 * register if we can).
14787 */
14788 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
14789 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
14790 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
14791 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
14792 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
14793 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
14794 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
14795 : UINT32_MAX;
14796//off=iemNativeEmitBrk(pReNative, off, 0);
14797 /*
14798 * Jump to the TLB lookup code.
14799 */
14800 if (!TlbState.fSkip)
14801 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
14802
14803 /*
14804 * TlbMiss:
14805 *
14806 * Call helper to do the fetching.
14807 * We flush all guest register shadow copies here.
14808 */
14809 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
14810
14811#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
14812 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
14813#else
14814 RT_NOREF(idxInstr);
14815#endif
14816
14817#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
14818 /* Save variables in volatile registers. */
14819 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
14820 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
14821#endif
14822
14823 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
14824 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
14825#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
14826 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
14827#else
14828 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
14829#endif
14830
14831 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
14832 if (iSegReg != UINT8_MAX)
14833 {
14834 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
14835 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
14836 }
14837
14838 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
14839 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
14840 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
14841
14842 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
14843 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
14844
14845 /* Done setting up parameters, make the call. */
14846 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
14847
14848 /*
14849 * Put the output in the right registers.
14850 */
14851 Assert(idxRegMemResult == pVarMem->idxReg);
14852 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
14853 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
14854
14855#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
14856 /* Restore variables and guest shadow registers to volatile registers. */
14857 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
14858 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
14859#endif
14860
14861 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
14862 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
14863
14864#ifdef IEMNATIVE_WITH_TLB_LOOKUP
14865 if (!TlbState.fSkip)
14866 {
14867 /* end of tlbsmiss - Jump to the done label. */
14868 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
14869 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
14870
14871 /*
14872 * TlbLookup:
14873 */
14874 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask, fAccess,
14875 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
14876# ifdef VBOX_WITH_STATISTICS
14877 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
14878 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
14879# endif
14880
14881 /* [idxVarUnmapInfo] = 0; */
14882 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
14883
14884 /*
14885 * TlbDone:
14886 */
14887 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
14888
14889 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
14890
14891# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
14892 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
14893 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
14894# endif
14895 }
14896#else
14897 RT_NOREF(fAccess, fAlignMask, idxLabelTlbMiss);
14898#endif
14899
14900 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
14901 iemNativeVarRegisterRelease(pReNative, idxVarMem);
14902
14903 return off;
14904}
14905
14906
14907#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
14908 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
14909 (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
14910
14911#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
14912 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
14913 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
14914
14915#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
14916 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
14917 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
14918
14919#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
14920 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
14921 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
14922
14923DECL_INLINE_THROW(uint32_t)
14924iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
14925 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
14926{
14927 /*
14928 * Assert sanity.
14929 */
14930 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
14931#if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
14932 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
14933#endif
14934 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
14935 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
14936 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
14937#ifdef VBOX_STRICT
14938 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
14939 {
14940 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
14941 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
14942 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
14943 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
14944 case IEM_ACCESS_TYPE_WRITE:
14945 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
14946 case IEM_ACCESS_TYPE_READ:
14947 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
14948 default: AssertFailed();
14949 }
14950#else
14951 RT_NOREF(fAccess);
14952#endif
14953
14954 /*
14955 * To keep things simple we have to commit any pending writes first as we
14956 * may end up making calls (there shouldn't be any at this point, so this
14957 * is just for consistency).
14958 */
14959 /** @todo we could postpone this till we make the call and reload the
14960 * registers after returning from the call. Not sure if that's sensible or
14961 * not, though. */
14962 off = iemNativeRegFlushPendingWrites(pReNative, off);
14963
14964 /*
14965 * Move/spill/flush stuff out of call-volatile registers.
14966 *
14967 * We exclude any register holding the bUnmapInfo variable, as we'll be
14968 * checking it after returning from the call and will free it afterwards.
14969 */
14970 /** @todo save+restore active registers and maybe guest shadows in miss
14971 * scenario. */
14972 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */, RT_BIT_32(idxVarUnmapInfo));
14973
14974 /*
14975 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
14976 * to call the unmap helper function.
14977 *
14978 * The likelyhood of it being zero is higher than for the TLB hit when doing
14979 * the mapping, as a TLB miss for an well aligned and unproblematic memory
14980 * access should also end up with a mapping that won't need special unmapping.
14981 */
14982 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
14983 * should speed up things for the pure interpreter as well when TLBs
14984 * are enabled. */
14985#ifdef RT_ARCH_AMD64
14986 if (pVarUnmapInfo->idxReg == UINT8_MAX)
14987 {
14988 /* test byte [rbp - xxx], 0ffh */
14989 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
14990 pbCodeBuf[off++] = 0xf6;
14991 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
14992 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
14993 pbCodeBuf[off++] = 0xff;
14994 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
14995 }
14996 else
14997#endif
14998 {
14999 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
15000 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
15001 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
15002 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
15003 }
15004 uint32_t const offJmpFixup = off;
15005 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
15006
15007 /*
15008 * Call the unmap helper function.
15009 */
15010#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
15011 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
15012#else
15013 RT_NOREF(idxInstr);
15014#endif
15015
15016 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
15017 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
15018 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
15019
15020 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
15021 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
15022
15023 /* Done setting up parameters, make the call. */
15024 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
15025
15026 /* The bUnmapInfo variable is implictly free by these MCs. */
15027 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
15028
15029 /*
15030 * Done, just fixup the jump for the non-call case.
15031 */
15032 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
15033
15034 return off;
15035}
15036
15037
15038
15039/*********************************************************************************************************************************
15040* State and Exceptions *
15041*********************************************************************************************************************************/
15042
15043#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
15044#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
15045
15046#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
15047#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
15048#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
15049
15050#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
15051#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
15052#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
15053
15054
15055DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
15056{
15057 /** @todo this needs a lot more work later. */
15058 RT_NOREF(pReNative, fForChange);
15059 return off;
15060}
15061
15062
15063
15064/*********************************************************************************************************************************
15065* Emitters for FPU related operations. *
15066*********************************************************************************************************************************/
15067
15068#define IEM_MC_FETCH_FCW(a_u16Fcw) \
15069 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
15070
15071/** Emits code for IEM_MC_FETCH_FCW. */
15072DECL_INLINE_THROW(uint32_t)
15073iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
15074{
15075 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
15076 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
15077
15078 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
15079
15080 /* Allocate a temporary FCW register. */
15081 /** @todo eliminate extra register */
15082 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
15083 kIemNativeGstRegUse_ReadOnly);
15084
15085 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
15086
15087 /* Free but don't flush the FCW register. */
15088 iemNativeRegFreeTmp(pReNative, idxFcwReg);
15089 iemNativeVarRegisterRelease(pReNative, idxDstVar);
15090
15091 return off;
15092}
15093
15094
15095#define IEM_MC_FETCH_FSW(a_u16Fsw) \
15096 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
15097
15098/** Emits code for IEM_MC_FETCH_FSW. */
15099DECL_INLINE_THROW(uint32_t)
15100iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
15101{
15102 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
15103 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
15104
15105 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, false /*fInitialized*/);
15106 /* Allocate a temporary FSW register. */
15107 /** @todo eliminate extra register */
15108 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
15109 kIemNativeGstRegUse_ReadOnly);
15110
15111 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
15112
15113 /* Free but don't flush the FSW register. */
15114 iemNativeRegFreeTmp(pReNative, idxFswReg);
15115 iemNativeVarRegisterRelease(pReNative, idxDstVar);
15116
15117 return off;
15118}
15119
15120
15121
15122#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
15123
15124
15125/*********************************************************************************************************************************
15126* Emitters for SSE/AVX specific operations. *
15127*********************************************************************************************************************************/
15128
15129#define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc) \
15130 off = iemNativeEmitSimdCopyXregU128(pReNative, off, a_iXRegDst, a_iXRegSrc)
15131
15132/** Emits code for IEM_MC_COPY_XREG_U128. */
15133DECL_INLINE_THROW(uint32_t)
15134iemNativeEmitSimdCopyXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXRegDst, uint8_t iXRegSrc)
15135{
15136 /* Allocate destination and source register. */
15137 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegDst),
15138 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForFullWrite);
15139 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc),
15140 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
15141
15142 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
15143 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXRegDst);
15144 /* We don't need to write everything back here as the destination is marked as dirty and will be flushed automatically. */
15145
15146 /* Free but don't flush the source and destination register. */
15147 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
15148 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
15149
15150 return off;
15151}
15152
15153
15154#define IEM_MC_FETCH_XREG_U64(a_u64Value, a_iXReg, a_iQWord) \
15155 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_u64Value, a_iXReg, a_iQWord)
15156
15157/** Emits code for IEM_MC_FETCH_XREG_U64. */
15158DECL_INLINE_THROW(uint32_t)
15159iemNativeEmitSimdFetchXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iQWord)
15160{
15161 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
15162 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
15163
15164 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
15165 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
15166
15167 iemNativeVarSetKindToStack(pReNative, idxDstVar);
15168 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
15169
15170 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
15171
15172 /* Free but don't flush the source register. */
15173 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
15174 iemNativeVarRegisterRelease(pReNative, idxDstVar);
15175
15176 return off;
15177}
15178
15179
15180#define IEM_MC_FETCH_XREG_U32(a_u64Value, a_iXReg, a_iDWord) \
15181 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_u64Value, a_iXReg, a_iDWord)
15182
15183/** Emits code for IEM_MC_FETCH_XREG_U32. */
15184DECL_INLINE_THROW(uint32_t)
15185iemNativeEmitSimdFetchXregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iDWord)
15186{
15187 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
15188 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
15189
15190 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
15191 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
15192
15193 iemNativeVarSetKindToStack(pReNative, idxDstVar);
15194 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
15195
15196 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
15197
15198 /* Free but don't flush the source register. */
15199 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
15200 iemNativeVarRegisterRelease(pReNative, idxDstVar);
15201
15202 return off;
15203}
15204
15205
15206#define IEM_MC_CLEAR_YREG_128_UP(a_iYReg) \
15207 off = iemNativeEmitSimdClearYregHighU128(pReNative, off, a_iYReg)
15208
15209/** Emits code for IEM_MC_CLEAR_YREG_128_UP. */
15210DECL_INLINE_THROW(uint32_t)
15211iemNativeEmitSimdClearYregHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
15212{
15213 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
15214 kIemNativeGstSimdRegLdStSz_High128, kIemNativeGstRegUse_ForFullWrite);
15215
15216 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
15217 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iYReg);
15218
15219 /* Free but don't flush the register. */
15220 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
15221
15222 return off;
15223}
15224
15225#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
15226
15227
15228/*********************************************************************************************************************************
15229* The native code generator functions for each MC block. *
15230*********************************************************************************************************************************/
15231
15232/*
15233 * Include instruction emitters.
15234 */
15235#include "target-x86/IEMAllN8veEmit-x86.h"
15236
15237/*
15238 * Include g_apfnIemNativeRecompileFunctions and associated functions.
15239 *
15240 * This should probably live in it's own file later, but lets see what the
15241 * compile times turn out to be first.
15242 */
15243#include "IEMNativeFunctions.cpp.h"
15244
15245
15246
15247/*********************************************************************************************************************************
15248* Recompiler Core. *
15249*********************************************************************************************************************************/
15250
15251
15252/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
15253static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
15254{
15255 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
15256 pDis->cbCachedInstr += cbMaxRead;
15257 RT_NOREF(cbMinRead);
15258 return VERR_NO_DATA;
15259}
15260
15261
15262DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
15263{
15264 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
15265 {
15266#define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */
15267 ENTRY(fLocalForcedActions),
15268 ENTRY(iem.s.rcPassUp),
15269 ENTRY(iem.s.fExec),
15270 ENTRY(iem.s.pbInstrBuf),
15271 ENTRY(iem.s.uInstrBufPc),
15272 ENTRY(iem.s.GCPhysInstrBuf),
15273 ENTRY(iem.s.cbInstrBufTotal),
15274 ENTRY(iem.s.idxTbCurInstr),
15275#ifdef VBOX_WITH_STATISTICS
15276 ENTRY(iem.s.StatNativeTlbHitsForFetch),
15277 ENTRY(iem.s.StatNativeTlbHitsForStore),
15278 ENTRY(iem.s.StatNativeTlbHitsForStack),
15279 ENTRY(iem.s.StatNativeTlbHitsForMapped),
15280 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
15281 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
15282 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
15283 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
15284#endif
15285 ENTRY(iem.s.DataTlb.aEntries),
15286 ENTRY(iem.s.DataTlb.uTlbRevision),
15287 ENTRY(iem.s.DataTlb.uTlbPhysRev),
15288 ENTRY(iem.s.DataTlb.cTlbHits),
15289 ENTRY(iem.s.CodeTlb.aEntries),
15290 ENTRY(iem.s.CodeTlb.uTlbRevision),
15291 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
15292 ENTRY(iem.s.CodeTlb.cTlbHits),
15293 ENTRY(pVMR3),
15294 ENTRY(cpum.GstCtx.rax),
15295 ENTRY(cpum.GstCtx.ah),
15296 ENTRY(cpum.GstCtx.rcx),
15297 ENTRY(cpum.GstCtx.ch),
15298 ENTRY(cpum.GstCtx.rdx),
15299 ENTRY(cpum.GstCtx.dh),
15300 ENTRY(cpum.GstCtx.rbx),
15301 ENTRY(cpum.GstCtx.bh),
15302 ENTRY(cpum.GstCtx.rsp),
15303 ENTRY(cpum.GstCtx.rbp),
15304 ENTRY(cpum.GstCtx.rsi),
15305 ENTRY(cpum.GstCtx.rdi),
15306 ENTRY(cpum.GstCtx.r8),
15307 ENTRY(cpum.GstCtx.r9),
15308 ENTRY(cpum.GstCtx.r10),
15309 ENTRY(cpum.GstCtx.r11),
15310 ENTRY(cpum.GstCtx.r12),
15311 ENTRY(cpum.GstCtx.r13),
15312 ENTRY(cpum.GstCtx.r14),
15313 ENTRY(cpum.GstCtx.r15),
15314 ENTRY(cpum.GstCtx.es.Sel),
15315 ENTRY(cpum.GstCtx.es.u64Base),
15316 ENTRY(cpum.GstCtx.es.u32Limit),
15317 ENTRY(cpum.GstCtx.es.Attr),
15318 ENTRY(cpum.GstCtx.cs.Sel),
15319 ENTRY(cpum.GstCtx.cs.u64Base),
15320 ENTRY(cpum.GstCtx.cs.u32Limit),
15321 ENTRY(cpum.GstCtx.cs.Attr),
15322 ENTRY(cpum.GstCtx.ss.Sel),
15323 ENTRY(cpum.GstCtx.ss.u64Base),
15324 ENTRY(cpum.GstCtx.ss.u32Limit),
15325 ENTRY(cpum.GstCtx.ss.Attr),
15326 ENTRY(cpum.GstCtx.ds.Sel),
15327 ENTRY(cpum.GstCtx.ds.u64Base),
15328 ENTRY(cpum.GstCtx.ds.u32Limit),
15329 ENTRY(cpum.GstCtx.ds.Attr),
15330 ENTRY(cpum.GstCtx.fs.Sel),
15331 ENTRY(cpum.GstCtx.fs.u64Base),
15332 ENTRY(cpum.GstCtx.fs.u32Limit),
15333 ENTRY(cpum.GstCtx.fs.Attr),
15334 ENTRY(cpum.GstCtx.gs.Sel),
15335 ENTRY(cpum.GstCtx.gs.u64Base),
15336 ENTRY(cpum.GstCtx.gs.u32Limit),
15337 ENTRY(cpum.GstCtx.gs.Attr),
15338 ENTRY(cpum.GstCtx.rip),
15339 ENTRY(cpum.GstCtx.eflags),
15340 ENTRY(cpum.GstCtx.uRipInhibitInt),
15341#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
15342 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
15343 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
15344 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
15345 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
15346 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
15347 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
15348 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
15349 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
15350 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
15351 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
15352 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
15353 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
15354 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
15355 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
15356 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
15357 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
15358 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
15359 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
15360 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
15361 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
15362 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
15363 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
15364 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
15365 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
15366 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
15367 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
15368 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
15369 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
15370 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
15371 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
15372 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
15373 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
15374#endif
15375#undef ENTRY
15376 };
15377#ifdef VBOX_STRICT
15378 static bool s_fOrderChecked = false;
15379 if (!s_fOrderChecked)
15380 {
15381 s_fOrderChecked = true;
15382 uint32_t offPrev = s_aMembers[0].off;
15383 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
15384 {
15385 Assert(s_aMembers[i].off > offPrev);
15386 offPrev = s_aMembers[i].off;
15387 }
15388 }
15389#endif
15390
15391 /*
15392 * Binary lookup.
15393 */
15394 unsigned iStart = 0;
15395 unsigned iEnd = RT_ELEMENTS(s_aMembers);
15396 for (;;)
15397 {
15398 unsigned const iCur = iStart + (iEnd - iStart) / 2;
15399 uint32_t const offCur = s_aMembers[iCur].off;
15400 if (off < offCur)
15401 {
15402 if (iCur != iStart)
15403 iEnd = iCur;
15404 else
15405 break;
15406 }
15407 else if (off > offCur)
15408 {
15409 if (iCur + 1 < iEnd)
15410 iStart = iCur + 1;
15411 else
15412 break;
15413 }
15414 else
15415 return s_aMembers[iCur].pszName;
15416 }
15417#ifdef VBOX_WITH_STATISTICS
15418 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
15419 return "iem.s.acThreadedFuncStats[iFn]";
15420#endif
15421 return NULL;
15422}
15423
15424
15425/**
15426 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
15427 * @returns pszBuf.
15428 * @param fFlags The flags.
15429 * @param pszBuf The output buffer.
15430 * @param cbBuf The output buffer size. At least 32 bytes.
15431 */
15432DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
15433{
15434 Assert(cbBuf >= 32);
15435 static RTSTRTUPLE const s_aModes[] =
15436 {
15437 /* [00] = */ { RT_STR_TUPLE("16BIT") },
15438 /* [01] = */ { RT_STR_TUPLE("32BIT") },
15439 /* [02] = */ { RT_STR_TUPLE("!2!") },
15440 /* [03] = */ { RT_STR_TUPLE("!3!") },
15441 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
15442 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
15443 /* [06] = */ { RT_STR_TUPLE("!6!") },
15444 /* [07] = */ { RT_STR_TUPLE("!7!") },
15445 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
15446 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
15447 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
15448 /* [0b] = */ { RT_STR_TUPLE("!b!") },
15449 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
15450 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
15451 /* [0e] = */ { RT_STR_TUPLE("!e!") },
15452 /* [0f] = */ { RT_STR_TUPLE("!f!") },
15453 /* [10] = */ { RT_STR_TUPLE("!10!") },
15454 /* [11] = */ { RT_STR_TUPLE("!11!") },
15455 /* [12] = */ { RT_STR_TUPLE("!12!") },
15456 /* [13] = */ { RT_STR_TUPLE("!13!") },
15457 /* [14] = */ { RT_STR_TUPLE("!14!") },
15458 /* [15] = */ { RT_STR_TUPLE("!15!") },
15459 /* [16] = */ { RT_STR_TUPLE("!16!") },
15460 /* [17] = */ { RT_STR_TUPLE("!17!") },
15461 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
15462 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
15463 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
15464 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
15465 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
15466 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
15467 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
15468 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
15469 };
15470 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
15471 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
15472 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
15473
15474 pszBuf[off++] = ' ';
15475 pszBuf[off++] = 'C';
15476 pszBuf[off++] = 'P';
15477 pszBuf[off++] = 'L';
15478 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
15479 Assert(off < 32);
15480
15481 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
15482
15483 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
15484 {
15485 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
15486 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
15487 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
15488 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
15489 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
15490 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
15491 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
15492 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
15493 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
15494 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
15495 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
15496 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
15497 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
15498 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
15499 };
15500 if (fFlags)
15501 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
15502 if (s_aFlags[i].fFlag & fFlags)
15503 {
15504 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
15505 pszBuf[off++] = ' ';
15506 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
15507 off += s_aFlags[i].cchName;
15508 fFlags &= ~s_aFlags[i].fFlag;
15509 if (!fFlags)
15510 break;
15511 }
15512 pszBuf[off] = '\0';
15513
15514 return pszBuf;
15515}
15516
15517
15518DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
15519{
15520 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
15521#if defined(RT_ARCH_AMD64)
15522 static const char * const a_apszMarkers[] =
15523 {
15524 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
15525 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
15526 };
15527#endif
15528
15529 char szDisBuf[512];
15530 DISSTATE Dis;
15531 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
15532 uint32_t const cNative = pTb->Native.cInstructions;
15533 uint32_t offNative = 0;
15534#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
15535 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
15536#endif
15537 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
15538 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
15539 : DISCPUMODE_64BIT;
15540#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
15541 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
15542#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
15543 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
15544#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
15545# error "Port me"
15546#else
15547 csh hDisasm = ~(size_t)0;
15548# if defined(RT_ARCH_AMD64)
15549 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
15550# elif defined(RT_ARCH_ARM64)
15551 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
15552# else
15553# error "Port me"
15554# endif
15555 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
15556
15557 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
15558 //Assert(rcCs == CS_ERR_OK);
15559#endif
15560
15561 /*
15562 * Print TB info.
15563 */
15564 pHlp->pfnPrintf(pHlp,
15565 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
15566 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
15567 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
15568 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
15569#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
15570 if (pDbgInfo && pDbgInfo->cEntries > 1)
15571 {
15572 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
15573
15574 /*
15575 * This disassembly is driven by the debug info which follows the native
15576 * code and indicates when it starts with the next guest instructions,
15577 * where labels are and such things.
15578 */
15579 uint32_t idxThreadedCall = 0;
15580 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
15581 uint8_t idxRange = UINT8_MAX;
15582 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
15583 uint32_t offRange = 0;
15584 uint32_t offOpcodes = 0;
15585 uint32_t const cbOpcodes = pTb->cbOpcodes;
15586 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
15587 uint32_t const cDbgEntries = pDbgInfo->cEntries;
15588 uint32_t iDbgEntry = 1;
15589 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
15590
15591 while (offNative < cNative)
15592 {
15593 /* If we're at or have passed the point where the next chunk of debug
15594 info starts, process it. */
15595 if (offDbgNativeNext <= offNative)
15596 {
15597 offDbgNativeNext = UINT32_MAX;
15598 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
15599 {
15600 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
15601 {
15602 case kIemTbDbgEntryType_GuestInstruction:
15603 {
15604 /* Did the exec flag change? */
15605 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
15606 {
15607 pHlp->pfnPrintf(pHlp,
15608 " fExec change %#08x -> %#08x %s\n",
15609 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
15610 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
15611 szDisBuf, sizeof(szDisBuf)));
15612 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
15613 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
15614 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
15615 : DISCPUMODE_64BIT;
15616 }
15617
15618 /* New opcode range? We need to fend up a spurious debug info entry here for cases
15619 where the compilation was aborted before the opcode was recorded and the actual
15620 instruction was translated to a threaded call. This may happen when we run out
15621 of ranges, or when some complicated interrupts/FFs are found to be pending or
15622 similar. So, we just deal with it here rather than in the compiler code as it
15623 is a lot simpler to do here. */
15624 if ( idxRange == UINT8_MAX
15625 || idxRange >= cRanges
15626 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
15627 {
15628 idxRange += 1;
15629 if (idxRange < cRanges)
15630 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
15631 else
15632 continue;
15633 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
15634 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
15635 + (pTb->aRanges[idxRange].idxPhysPage == 0
15636 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
15637 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
15638 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
15639 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
15640 pTb->aRanges[idxRange].idxPhysPage);
15641 GCPhysPc += offRange;
15642 }
15643
15644 /* Disassemble the instruction. */
15645 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
15646 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
15647 uint32_t cbInstr = 1;
15648 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
15649 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
15650 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
15651 if (RT_SUCCESS(rc))
15652 {
15653 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
15654 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
15655 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
15656 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
15657
15658 static unsigned const s_offMarker = 55;
15659 static char const s_szMarker[] = " ; <--- guest";
15660 if (cch < s_offMarker)
15661 {
15662 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
15663 cch = s_offMarker;
15664 }
15665 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
15666 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
15667
15668 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
15669 }
15670 else
15671 {
15672 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
15673 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
15674 cbInstr = 1;
15675 }
15676 GCPhysPc += cbInstr;
15677 offOpcodes += cbInstr;
15678 offRange += cbInstr;
15679 continue;
15680 }
15681
15682 case kIemTbDbgEntryType_ThreadedCall:
15683 pHlp->pfnPrintf(pHlp,
15684 " Call #%u to %s (%u args) - %s\n",
15685 idxThreadedCall,
15686 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
15687 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
15688 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
15689 idxThreadedCall++;
15690 continue;
15691
15692 case kIemTbDbgEntryType_GuestRegShadowing:
15693 {
15694 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
15695 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
15696 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
15697 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
15698 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
15699 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
15700 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
15701 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
15702 else
15703 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
15704 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
15705 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
15706 continue;
15707 }
15708
15709#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
15710 case kIemTbDbgEntryType_GuestSimdRegShadowing:
15711 {
15712 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
15713 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
15714 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
15715 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
15716 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
15717 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
15718 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
15719 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
15720 else
15721 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
15722 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
15723 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
15724 continue;
15725 }
15726#endif
15727
15728 case kIemTbDbgEntryType_Label:
15729 {
15730 const char *pszName = "what_the_fudge";
15731 const char *pszComment = "";
15732 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
15733 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
15734 {
15735 case kIemNativeLabelType_Return:
15736 pszName = "Return";
15737 break;
15738 case kIemNativeLabelType_ReturnBreak:
15739 pszName = "ReturnBreak";
15740 break;
15741 case kIemNativeLabelType_ReturnWithFlags:
15742 pszName = "ReturnWithFlags";
15743 break;
15744 case kIemNativeLabelType_NonZeroRetOrPassUp:
15745 pszName = "NonZeroRetOrPassUp";
15746 break;
15747 case kIemNativeLabelType_RaiseGp0:
15748 pszName = "RaiseGp0";
15749 break;
15750 case kIemNativeLabelType_RaiseNm:
15751 pszName = "RaiseNm";
15752 break;
15753 case kIemNativeLabelType_RaiseUd:
15754 pszName = "RaiseUd";
15755 break;
15756 case kIemNativeLabelType_RaiseMf:
15757 pszName = "RaiseMf";
15758 break;
15759 case kIemNativeLabelType_RaiseXf:
15760 pszName = "RaiseXf";
15761 break;
15762 case kIemNativeLabelType_ObsoleteTb:
15763 pszName = "ObsoleteTb";
15764 break;
15765 case kIemNativeLabelType_NeedCsLimChecking:
15766 pszName = "NeedCsLimChecking";
15767 break;
15768 case kIemNativeLabelType_CheckBranchMiss:
15769 pszName = "CheckBranchMiss";
15770 break;
15771 case kIemNativeLabelType_If:
15772 pszName = "If";
15773 fNumbered = true;
15774 break;
15775 case kIemNativeLabelType_Else:
15776 pszName = "Else";
15777 fNumbered = true;
15778 pszComment = " ; regs state restored pre-if-block";
15779 break;
15780 case kIemNativeLabelType_Endif:
15781 pszName = "Endif";
15782 fNumbered = true;
15783 break;
15784 case kIemNativeLabelType_CheckIrq:
15785 pszName = "CheckIrq_CheckVM";
15786 fNumbered = true;
15787 break;
15788 case kIemNativeLabelType_TlbLookup:
15789 pszName = "TlbLookup";
15790 fNumbered = true;
15791 break;
15792 case kIemNativeLabelType_TlbMiss:
15793 pszName = "TlbMiss";
15794 fNumbered = true;
15795 break;
15796 case kIemNativeLabelType_TlbDone:
15797 pszName = "TlbDone";
15798 fNumbered = true;
15799 break;
15800 case kIemNativeLabelType_Invalid:
15801 case kIemNativeLabelType_End:
15802 break;
15803 }
15804 if (fNumbered)
15805 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
15806 else
15807 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
15808 continue;
15809 }
15810
15811 case kIemTbDbgEntryType_NativeOffset:
15812 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
15813 Assert(offDbgNativeNext > offNative);
15814 break;
15815
15816#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
15817 case kIemTbDbgEntryType_DelayedPcUpdate:
15818 pHlp->pfnPrintf(pHlp,
15819 " Updating guest PC value by %u (cInstrSkipped=%u)\n",
15820 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
15821 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
15822 continue;
15823#endif
15824
15825 default:
15826 AssertFailed();
15827 }
15828 iDbgEntry++;
15829 break;
15830 }
15831 }
15832
15833 /*
15834 * Disassemble the next native instruction.
15835 */
15836 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
15837# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
15838 uint32_t cbInstr = sizeof(paNative[0]);
15839 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
15840 if (RT_SUCCESS(rc))
15841 {
15842# if defined(RT_ARCH_AMD64)
15843 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
15844 {
15845 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
15846 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
15847 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
15848 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
15849 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
15850 uInfo & 0x8000 ? "recompiled" : "todo");
15851 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
15852 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
15853 else
15854 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
15855 }
15856 else
15857# endif
15858 {
15859 const char *pszAnnotation = NULL;
15860# ifdef RT_ARCH_AMD64
15861 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
15862 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
15863 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
15864 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
15865 PCDISOPPARAM pMemOp;
15866 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))
15867 pMemOp = &Dis.Param1;
15868 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))
15869 pMemOp = &Dis.Param2;
15870 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))
15871 pMemOp = &Dis.Param3;
15872 else
15873 pMemOp = NULL;
15874 if ( pMemOp
15875 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
15876 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
15877 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
15878 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
15879
15880#elif defined(RT_ARCH_ARM64)
15881 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
15882 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
15883 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
15884# else
15885# error "Port me"
15886# endif
15887 if (pszAnnotation)
15888 {
15889 static unsigned const s_offAnnotation = 55;
15890 size_t const cchAnnotation = strlen(pszAnnotation);
15891 size_t cchDis = strlen(szDisBuf);
15892 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
15893 {
15894 if (cchDis < s_offAnnotation)
15895 {
15896 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
15897 cchDis = s_offAnnotation;
15898 }
15899 szDisBuf[cchDis++] = ' ';
15900 szDisBuf[cchDis++] = ';';
15901 szDisBuf[cchDis++] = ' ';
15902 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
15903 }
15904 }
15905 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
15906 }
15907 }
15908 else
15909 {
15910# if defined(RT_ARCH_AMD64)
15911 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
15912 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
15913# elif defined(RT_ARCH_ARM64)
15914 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
15915# else
15916# error "Port me"
15917# endif
15918 cbInstr = sizeof(paNative[0]);
15919 }
15920 offNative += cbInstr / sizeof(paNative[0]);
15921
15922# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
15923 cs_insn *pInstr;
15924 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
15925 (uintptr_t)pNativeCur, 1, &pInstr);
15926 if (cInstrs > 0)
15927 {
15928 Assert(cInstrs == 1);
15929 const char *pszAnnotation = NULL;
15930# if defined(RT_ARCH_ARM64)
15931 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
15932 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
15933 {
15934 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
15935 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
15936 char *psz = strchr(pInstr->op_str, '[');
15937 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
15938 {
15939 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
15940 int32_t off = -1;
15941 psz += 4;
15942 if (*psz == ']')
15943 off = 0;
15944 else if (*psz == ',')
15945 {
15946 psz = RTStrStripL(psz + 1);
15947 if (*psz == '#')
15948 off = RTStrToInt32(&psz[1]);
15949 /** @todo deal with index registers and LSL as well... */
15950 }
15951 if (off >= 0)
15952 pszAnnotation = iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
15953 }
15954 }
15955# endif
15956
15957 size_t const cchOp = strlen(pInstr->op_str);
15958# if defined(RT_ARCH_AMD64)
15959 if (pszAnnotation)
15960 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
15961 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
15962 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
15963 else
15964 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
15965 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
15966
15967# else
15968 if (pszAnnotation)
15969 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
15970 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
15971 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
15972 else
15973 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
15974 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
15975# endif
15976 offNative += pInstr->size / sizeof(*pNativeCur);
15977 cs_free(pInstr, cInstrs);
15978 }
15979 else
15980 {
15981# if defined(RT_ARCH_AMD64)
15982 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
15983 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
15984# else
15985 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
15986# endif
15987 offNative++;
15988 }
15989# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
15990 }
15991 }
15992 else
15993#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
15994 {
15995 /*
15996 * No debug info, just disassemble the x86 code and then the native code.
15997 *
15998 * First the guest code:
15999 */
16000 for (unsigned i = 0; i < pTb->cRanges; i++)
16001 {
16002 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
16003 + (pTb->aRanges[i].idxPhysPage == 0
16004 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
16005 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
16006 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
16007 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
16008 unsigned off = pTb->aRanges[i].offOpcodes;
16009 /** @todo this ain't working when crossing pages! */
16010 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
16011 while (off < cbOpcodes)
16012 {
16013 uint32_t cbInstr = 1;
16014 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
16015 &pTb->pabOpcodes[off], cbOpcodes - off,
16016 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
16017 if (RT_SUCCESS(rc))
16018 {
16019 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
16020 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
16021 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
16022 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
16023 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
16024 GCPhysPc += cbInstr;
16025 off += cbInstr;
16026 }
16027 else
16028 {
16029 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
16030 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
16031 break;
16032 }
16033 }
16034 }
16035
16036 /*
16037 * Then the native code:
16038 */
16039 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
16040 while (offNative < cNative)
16041 {
16042 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
16043# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
16044 uint32_t cbInstr = sizeof(paNative[0]);
16045 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
16046 if (RT_SUCCESS(rc))
16047 {
16048# if defined(RT_ARCH_AMD64)
16049 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
16050 {
16051 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
16052 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
16053 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
16054 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
16055 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
16056 uInfo & 0x8000 ? "recompiled" : "todo");
16057 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
16058 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
16059 else
16060 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
16061 }
16062 else
16063# endif
16064 {
16065# ifdef RT_ARCH_AMD64
16066 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
16067 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
16068 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
16069 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
16070# elif defined(RT_ARCH_ARM64)
16071 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
16072 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
16073 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
16074# else
16075# error "Port me"
16076# endif
16077 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
16078 }
16079 }
16080 else
16081 {
16082# if defined(RT_ARCH_AMD64)
16083 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
16084 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
16085# else
16086 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
16087# endif
16088 cbInstr = sizeof(paNative[0]);
16089 }
16090 offNative += cbInstr / sizeof(paNative[0]);
16091
16092# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
16093 cs_insn *pInstr;
16094 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
16095 (uintptr_t)pNativeCur, 1, &pInstr);
16096 if (cInstrs > 0)
16097 {
16098 Assert(cInstrs == 1);
16099# if defined(RT_ARCH_AMD64)
16100 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
16101 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
16102# else
16103 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
16104 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
16105# endif
16106 offNative += pInstr->size / sizeof(*pNativeCur);
16107 cs_free(pInstr, cInstrs);
16108 }
16109 else
16110 {
16111# if defined(RT_ARCH_AMD64)
16112 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
16113 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
16114# else
16115 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
16116# endif
16117 offNative++;
16118 }
16119# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
16120 }
16121 }
16122
16123#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
16124 /* Cleanup. */
16125 cs_close(&hDisasm);
16126#endif
16127}
16128
16129
16130/**
16131 * Recompiles the given threaded TB into a native one.
16132 *
16133 * In case of failure the translation block will be returned as-is.
16134 *
16135 * @returns pTb.
16136 * @param pVCpu The cross context virtual CPU structure of the calling
16137 * thread.
16138 * @param pTb The threaded translation to recompile to native.
16139 */
16140DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
16141{
16142 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
16143
16144 /*
16145 * The first time thru, we allocate the recompiler state, the other times
16146 * we just need to reset it before using it again.
16147 */
16148 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
16149 if (RT_LIKELY(pReNative))
16150 iemNativeReInit(pReNative, pTb);
16151 else
16152 {
16153 pReNative = iemNativeInit(pVCpu, pTb);
16154 AssertReturn(pReNative, pTb);
16155 }
16156
16157#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
16158 /*
16159 * First do liveness analysis. This is done backwards.
16160 */
16161 {
16162 uint32_t idxCall = pTb->Thrd.cCalls;
16163 if (idxCall <= pReNative->cLivenessEntriesAlloc)
16164 { /* likely */ }
16165 else
16166 {
16167 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
16168 while (idxCall > cAlloc)
16169 cAlloc *= 2;
16170 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
16171 AssertReturn(pvNew, pTb);
16172 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
16173 pReNative->cLivenessEntriesAlloc = cAlloc;
16174 }
16175 AssertReturn(idxCall > 0, pTb);
16176 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
16177
16178 /* The initial (final) entry. */
16179 idxCall--;
16180 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
16181
16182 /* Loop backwards thru the calls and fill in the other entries. */
16183 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
16184 while (idxCall > 0)
16185 {
16186 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
16187 if (pfnLiveness)
16188 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
16189 else
16190 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
16191 pCallEntry--;
16192 idxCall--;
16193 }
16194
16195# ifdef VBOX_WITH_STATISTICS
16196 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
16197 to 'clobbered' rather that 'input'. */
16198 /** @todo */
16199# endif
16200 }
16201#endif
16202
16203 /*
16204 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
16205 * for aborting if an error happens.
16206 */
16207 uint32_t cCallsLeft = pTb->Thrd.cCalls;
16208#ifdef LOG_ENABLED
16209 uint32_t const cCallsOrg = cCallsLeft;
16210#endif
16211 uint32_t off = 0;
16212 int rc = VINF_SUCCESS;
16213 IEMNATIVE_TRY_SETJMP(pReNative, rc)
16214 {
16215 /*
16216 * Emit prolog code (fixed).
16217 */
16218 off = iemNativeEmitProlog(pReNative, off);
16219
16220 /*
16221 * Convert the calls to native code.
16222 */
16223#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
16224 int32_t iGstInstr = -1;
16225#endif
16226#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
16227 uint32_t cThreadedCalls = 0;
16228 uint32_t cRecompiledCalls = 0;
16229#endif
16230#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
16231 uint32_t idxCurCall = 0;
16232#endif
16233 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
16234 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
16235 while (cCallsLeft-- > 0)
16236 {
16237 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
16238#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
16239 pReNative->idxCurCall = idxCurCall;
16240#endif
16241
16242 /*
16243 * Debug info, assembly markup and statistics.
16244 */
16245#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
16246 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
16247 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
16248#endif
16249#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
16250 iemNativeDbgInfoAddNativeOffset(pReNative, off);
16251 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
16252 {
16253 if (iGstInstr < (int32_t)pTb->cInstructions)
16254 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
16255 else
16256 Assert(iGstInstr == pTb->cInstructions);
16257 iGstInstr = pCallEntry->idxInstr;
16258 }
16259 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
16260#endif
16261#if defined(VBOX_STRICT)
16262 off = iemNativeEmitMarker(pReNative, off,
16263 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
16264#endif
16265#if defined(VBOX_STRICT)
16266 iemNativeRegAssertSanity(pReNative);
16267#endif
16268#ifdef VBOX_WITH_STATISTICS
16269 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
16270#endif
16271
16272 /*
16273 * Actual work.
16274 */
16275 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
16276 pfnRecom ? "(recompiled)" : "(todo)"));
16277 if (pfnRecom) /** @todo stats on this. */
16278 {
16279 off = pfnRecom(pReNative, off, pCallEntry);
16280 STAM_REL_STATS({cRecompiledCalls++;});
16281 }
16282 else
16283 {
16284 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
16285 STAM_REL_STATS({cThreadedCalls++;});
16286 }
16287 Assert(off <= pReNative->cInstrBufAlloc);
16288 Assert(pReNative->cCondDepth == 0);
16289
16290#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
16291 if (LogIs2Enabled())
16292 {
16293 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
16294# ifndef IEMLIVENESS_EXTENDED_LAYOUT
16295 static const char s_achState[] = "CUXI";
16296# else
16297 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
16298# endif
16299
16300 char szGpr[17];
16301 for (unsigned i = 0; i < 16; i++)
16302 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
16303 szGpr[16] = '\0';
16304
16305 char szSegBase[X86_SREG_COUNT + 1];
16306 char szSegLimit[X86_SREG_COUNT + 1];
16307 char szSegAttrib[X86_SREG_COUNT + 1];
16308 char szSegSel[X86_SREG_COUNT + 1];
16309 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
16310 {
16311 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
16312 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
16313 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
16314 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
16315 }
16316 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
16317 = szSegSel[X86_SREG_COUNT] = '\0';
16318
16319 char szEFlags[8];
16320 for (unsigned i = 0; i < 7; i++)
16321 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
16322 szEFlags[7] = '\0';
16323
16324 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
16325 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
16326 }
16327#endif
16328
16329 /*
16330 * Advance.
16331 */
16332 pCallEntry++;
16333#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
16334 idxCurCall++;
16335#endif
16336 }
16337
16338 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
16339 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
16340 if (!cThreadedCalls)
16341 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
16342
16343 /*
16344 * Emit the epilog code.
16345 */
16346 uint32_t idxReturnLabel;
16347 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
16348
16349 /*
16350 * Generate special jump labels.
16351 */
16352 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
16353 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
16354 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
16355 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
16356 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
16357 off = iemNativeEmitRaiseGp0(pReNative, off, idxReturnLabel);
16358 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseNm))
16359 off = iemNativeEmitRaiseNm(pReNative, off, idxReturnLabel);
16360 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseUd))
16361 off = iemNativeEmitRaiseUd(pReNative, off, idxReturnLabel);
16362 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseMf))
16363 off = iemNativeEmitRaiseMf(pReNative, off, idxReturnLabel);
16364 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseXf))
16365 off = iemNativeEmitRaiseXf(pReNative, off, idxReturnLabel);
16366 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ObsoleteTb))
16367 off = iemNativeEmitObsoleteTb(pReNative, off, idxReturnLabel);
16368 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_NeedCsLimChecking))
16369 off = iemNativeEmitNeedCsLimChecking(pReNative, off, idxReturnLabel);
16370 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_CheckBranchMiss))
16371 off = iemNativeEmitCheckBranchMiss(pReNative, off, idxReturnLabel);
16372 }
16373 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
16374 {
16375 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
16376 return pTb;
16377 }
16378 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
16379 Assert(off <= pReNative->cInstrBufAlloc);
16380
16381 /*
16382 * Make sure all labels has been defined.
16383 */
16384 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
16385#ifdef VBOX_STRICT
16386 uint32_t const cLabels = pReNative->cLabels;
16387 for (uint32_t i = 0; i < cLabels; i++)
16388 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
16389#endif
16390
16391 /*
16392 * Allocate executable memory, copy over the code we've generated.
16393 */
16394 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
16395 if (pTbAllocator->pDelayedFreeHead)
16396 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
16397
16398 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
16399 AssertReturn(paFinalInstrBuf, pTb);
16400 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
16401
16402 /*
16403 * Apply fixups.
16404 */
16405 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
16406 uint32_t const cFixups = pReNative->cFixups;
16407 for (uint32_t i = 0; i < cFixups; i++)
16408 {
16409 Assert(paFixups[i].off < off);
16410 Assert(paFixups[i].idxLabel < cLabels);
16411 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
16412 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
16413 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
16414 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
16415 switch (paFixups[i].enmType)
16416 {
16417#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
16418 case kIemNativeFixupType_Rel32:
16419 Assert(paFixups[i].off + 4 <= off);
16420 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
16421 continue;
16422
16423#elif defined(RT_ARCH_ARM64)
16424 case kIemNativeFixupType_RelImm26At0:
16425 {
16426 Assert(paFixups[i].off < off);
16427 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
16428 Assert(offDisp >= -262144 && offDisp < 262144);
16429 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
16430 continue;
16431 }
16432
16433 case kIemNativeFixupType_RelImm19At5:
16434 {
16435 Assert(paFixups[i].off < off);
16436 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
16437 Assert(offDisp >= -262144 && offDisp < 262144);
16438 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
16439 continue;
16440 }
16441
16442 case kIemNativeFixupType_RelImm14At5:
16443 {
16444 Assert(paFixups[i].off < off);
16445 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
16446 Assert(offDisp >= -8192 && offDisp < 8192);
16447 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
16448 continue;
16449 }
16450
16451#endif
16452 case kIemNativeFixupType_Invalid:
16453 case kIemNativeFixupType_End:
16454 break;
16455 }
16456 AssertFailed();
16457 }
16458
16459 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
16460 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
16461
16462 /*
16463 * Convert the translation block.
16464 */
16465 RTMemFree(pTb->Thrd.paCalls);
16466 pTb->Native.paInstructions = paFinalInstrBuf;
16467 pTb->Native.cInstructions = off;
16468 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
16469#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
16470 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
16471 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
16472#endif
16473
16474 Assert(pTbAllocator->cThreadedTbs > 0);
16475 pTbAllocator->cThreadedTbs -= 1;
16476 pTbAllocator->cNativeTbs += 1;
16477 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
16478
16479#ifdef LOG_ENABLED
16480 /*
16481 * Disassemble to the log if enabled.
16482 */
16483 if (LogIs3Enabled())
16484 {
16485 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
16486 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
16487# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
16488 RTLogFlush(NULL);
16489# endif
16490 }
16491#endif
16492 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
16493
16494 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
16495 return pTb;
16496}
16497
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette