VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 103761

Last change on this file since 103761 was 103761, checked in by vboxsync, 9 months ago

VMM/IEM: Implement native emitter for IEM_MC_FETCH_XREG_U64(), bugref:10614

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 734.0 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 103761 2024-03-11 12:07:32Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMN8veRecompilerTlbLookup.h"
103#include "IEMNativeFunctions.h"
104
105
106/*
107 * Narrow down configs here to avoid wasting time on unused configs here.
108 * Note! Same checks in IEMAllThrdRecompiler.cpp.
109 */
110
111#ifndef IEM_WITH_CODE_TLB
112# error The code TLB must be enabled for the recompiler.
113#endif
114
115#ifndef IEM_WITH_DATA_TLB
116# error The data TLB must be enabled for the recompiler.
117#endif
118
119#ifndef IEM_WITH_SETJMP
120# error The setjmp approach must be enabled for the recompiler.
121#endif
122
123/** @todo eliminate this clang build hack. */
124#if RT_CLANG_PREREQ(4, 0)
125# pragma GCC diagnostic ignored "-Wunused-function"
126#endif
127
128
129/*********************************************************************************************************************************
130* Internal Functions *
131*********************************************************************************************************************************/
132#ifdef VBOX_STRICT
133static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
134 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg);
135# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
136static uint32_t iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
137 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz);
138# endif
139static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative);
140#endif
141#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
142static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off);
143static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
144#endif
145DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
146DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
147 IEMNATIVEGSTREG enmGstReg, uint32_t off);
148DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
149
150
151/*********************************************************************************************************************************
152* Executable Memory Allocator *
153*********************************************************************************************************************************/
154/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
155 * Use an alternative chunk sub-allocator that does store internal data
156 * in the chunk.
157 *
158 * Using the RTHeapSimple is not practial on newer darwin systems where
159 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
160 * memory. We would have to change the protection of the whole chunk for
161 * every call to RTHeapSimple, which would be rather expensive.
162 *
163 * This alternative implemenation let restrict page protection modifications
164 * to the pages backing the executable memory we just allocated.
165 */
166#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
167/** The chunk sub-allocation unit size in bytes. */
168#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
169/** The chunk sub-allocation unit size as a shift factor. */
170#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
171
172#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
173# ifdef IEMNATIVE_USE_GDB_JIT
174# define IEMNATIVE_USE_GDB_JIT_ET_DYN
175
176/** GDB JIT: Code entry. */
177typedef struct GDBJITCODEENTRY
178{
179 struct GDBJITCODEENTRY *pNext;
180 struct GDBJITCODEENTRY *pPrev;
181 uint8_t *pbSymFile;
182 uint64_t cbSymFile;
183} GDBJITCODEENTRY;
184
185/** GDB JIT: Actions. */
186typedef enum GDBJITACTIONS : uint32_t
187{
188 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
189} GDBJITACTIONS;
190
191/** GDB JIT: Descriptor. */
192typedef struct GDBJITDESCRIPTOR
193{
194 uint32_t uVersion;
195 GDBJITACTIONS enmAction;
196 GDBJITCODEENTRY *pRelevant;
197 GDBJITCODEENTRY *pHead;
198 /** Our addition: */
199 GDBJITCODEENTRY *pTail;
200} GDBJITDESCRIPTOR;
201
202/** GDB JIT: Our simple symbol file data. */
203typedef struct GDBJITSYMFILE
204{
205 Elf64_Ehdr EHdr;
206# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
207 Elf64_Shdr aShdrs[5];
208# else
209 Elf64_Shdr aShdrs[7];
210 Elf64_Phdr aPhdrs[2];
211# endif
212 /** The dwarf ehframe data for the chunk. */
213 uint8_t abEhFrame[512];
214 char szzStrTab[128];
215 Elf64_Sym aSymbols[3];
216# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
217 Elf64_Sym aDynSyms[2];
218 Elf64_Dyn aDyn[6];
219# endif
220} GDBJITSYMFILE;
221
222extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
223extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
224
225/** Init once for g_IemNativeGdbJitLock. */
226static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
227/** Init once for the critical section. */
228static RTCRITSECT g_IemNativeGdbJitLock;
229
230/** GDB reads the info here. */
231GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
232
233/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
234DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
235{
236 ASMNopPause();
237}
238
239/** @callback_method_impl{FNRTONCE} */
240static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
241{
242 RT_NOREF(pvUser);
243 return RTCritSectInit(&g_IemNativeGdbJitLock);
244}
245
246
247# endif /* IEMNATIVE_USE_GDB_JIT */
248
249/**
250 * Per-chunk unwind info for non-windows hosts.
251 */
252typedef struct IEMEXECMEMCHUNKEHFRAME
253{
254# ifdef IEMNATIVE_USE_LIBUNWIND
255 /** The offset of the FDA into abEhFrame. */
256 uintptr_t offFda;
257# else
258 /** 'struct object' storage area. */
259 uint8_t abObject[1024];
260# endif
261# ifdef IEMNATIVE_USE_GDB_JIT
262# if 0
263 /** The GDB JIT 'symbol file' data. */
264 GDBJITSYMFILE GdbJitSymFile;
265# endif
266 /** The GDB JIT list entry. */
267 GDBJITCODEENTRY GdbJitEntry;
268# endif
269 /** The dwarf ehframe data for the chunk. */
270 uint8_t abEhFrame[512];
271} IEMEXECMEMCHUNKEHFRAME;
272/** Pointer to per-chunk info info for non-windows hosts. */
273typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
274#endif
275
276
277/**
278 * An chunk of executable memory.
279 */
280typedef struct IEMEXECMEMCHUNK
281{
282#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
283 /** Number of free items in this chunk. */
284 uint32_t cFreeUnits;
285 /** Hint were to start searching for free space in the allocation bitmap. */
286 uint32_t idxFreeHint;
287#else
288 /** The heap handle. */
289 RTHEAPSIMPLE hHeap;
290#endif
291 /** Pointer to the chunk. */
292 void *pvChunk;
293#ifdef IN_RING3
294 /**
295 * Pointer to the unwind information.
296 *
297 * This is used during C++ throw and longjmp (windows and probably most other
298 * platforms). Some debuggers (windbg) makes use of it as well.
299 *
300 * Windows: This is allocated from hHeap on windows because (at least for
301 * AMD64) the UNWIND_INFO structure address in the
302 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
303 *
304 * Others: Allocated from the regular heap to avoid unnecessary executable data
305 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
306 void *pvUnwindInfo;
307#elif defined(IN_RING0)
308 /** Allocation handle. */
309 RTR0MEMOBJ hMemObj;
310#endif
311} IEMEXECMEMCHUNK;
312/** Pointer to a memory chunk. */
313typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
314
315
316/**
317 * Executable memory allocator for the native recompiler.
318 */
319typedef struct IEMEXECMEMALLOCATOR
320{
321 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
322 uint32_t uMagic;
323
324 /** The chunk size. */
325 uint32_t cbChunk;
326 /** The maximum number of chunks. */
327 uint32_t cMaxChunks;
328 /** The current number of chunks. */
329 uint32_t cChunks;
330 /** Hint where to start looking for available memory. */
331 uint32_t idxChunkHint;
332 /** Statistics: Current number of allocations. */
333 uint32_t cAllocations;
334
335 /** The total amount of memory available. */
336 uint64_t cbTotal;
337 /** Total amount of free memory. */
338 uint64_t cbFree;
339 /** Total amount of memory allocated. */
340 uint64_t cbAllocated;
341
342#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
343 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
344 *
345 * Since the chunk size is a power of two and the minimum chunk size is a lot
346 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
347 * require a whole number of uint64_t elements in the allocation bitmap. So,
348 * for sake of simplicity, they are allocated as one continous chunk for
349 * simplicity/laziness. */
350 uint64_t *pbmAlloc;
351 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
352 uint32_t cUnitsPerChunk;
353 /** Number of bitmap elements per chunk (for quickly locating the bitmap
354 * portion corresponding to an chunk). */
355 uint32_t cBitmapElementsPerChunk;
356#else
357 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
358 * @{ */
359 /** The size of the heap internal block header. This is used to adjust the
360 * request memory size to make sure there is exacly enough room for a header at
361 * the end of the blocks we allocate before the next 64 byte alignment line. */
362 uint32_t cbHeapBlockHdr;
363 /** The size of initial heap allocation required make sure the first
364 * allocation is correctly aligned. */
365 uint32_t cbHeapAlignTweak;
366 /** The alignment tweak allocation address. */
367 void *pvAlignTweak;
368 /** @} */
369#endif
370
371#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
372 /** Pointer to the array of unwind info running parallel to aChunks (same
373 * allocation as this structure, located after the bitmaps).
374 * (For Windows, the structures must reside in 32-bit RVA distance to the
375 * actual chunk, so they are allocated off the chunk.) */
376 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
377#endif
378
379 /** The allocation chunks. */
380 RT_FLEXIBLE_ARRAY_EXTENSION
381 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
382} IEMEXECMEMALLOCATOR;
383/** Pointer to an executable memory allocator. */
384typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
385
386/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
387#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
388
389
390static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
391
392
393/**
394 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
395 * the heap statistics.
396 */
397static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
398 uint32_t cbReq, uint32_t idxChunk)
399{
400 pExecMemAllocator->cAllocations += 1;
401 pExecMemAllocator->cbAllocated += cbReq;
402#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
403 pExecMemAllocator->cbFree -= cbReq;
404#else
405 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
406#endif
407 pExecMemAllocator->idxChunkHint = idxChunk;
408
409#ifdef RT_OS_DARWIN
410 /*
411 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
412 * on darwin. So, we mark the pages returned as read+write after alloc and
413 * expect the caller to call iemExecMemAllocatorReadyForUse when done
414 * writing to the allocation.
415 *
416 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
417 * for details.
418 */
419 /** @todo detect if this is necessary... it wasn't required on 10.15 or
420 * whatever older version it was. */
421 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
422 AssertRC(rc);
423#endif
424
425 return pvRet;
426}
427
428
429#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
430static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
431 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
432{
433 /*
434 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
435 */
436 Assert(!(cToScan & 63));
437 Assert(!(idxFirst & 63));
438 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
439 pbmAlloc += idxFirst / 64;
440
441 /*
442 * Scan the bitmap for cReqUnits of consequtive clear bits
443 */
444 /** @todo This can probably be done more efficiently for non-x86 systems. */
445 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
446 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
447 {
448 uint32_t idxAddBit = 1;
449 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
450 idxAddBit++;
451 if (idxAddBit >= cReqUnits)
452 {
453 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
454
455 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
456 pChunk->cFreeUnits -= cReqUnits;
457 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
458
459 void * const pvRet = (uint8_t *)pChunk->pvChunk
460 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
461
462 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
463 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
464 }
465
466 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
467 }
468 return NULL;
469}
470#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
471
472
473static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
474{
475#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
476 /*
477 * Figure out how much to allocate.
478 */
479 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
480 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
481 {
482 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
483 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
484 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
485 {
486 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
487 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
488 if (pvRet)
489 return pvRet;
490 }
491 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
492 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
493 cReqUnits, idxChunk);
494 }
495#else
496 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
497 if (pvRet)
498 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
499#endif
500 return NULL;
501
502}
503
504
505/**
506 * Allocates @a cbReq bytes of executable memory.
507 *
508 * @returns Pointer to the memory, NULL if out of memory or other problem
509 * encountered.
510 * @param pVCpu The cross context virtual CPU structure of the calling
511 * thread.
512 * @param cbReq How many bytes are required.
513 */
514static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
515{
516 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
517 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
518 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
519
520
521 for (unsigned iIteration = 0;; iIteration++)
522 {
523 /*
524 * Adjust the request size so it'll fit the allocator alignment/whatnot.
525 *
526 * For the RTHeapSimple allocator this means to follow the logic described
527 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
528 * existing chunks if we think we've got sufficient free memory around.
529 *
530 * While for the alternative one we just align it up to a whole unit size.
531 */
532#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
533 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
534#else
535 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
536#endif
537 if (cbReq <= pExecMemAllocator->cbFree)
538 {
539 uint32_t const cChunks = pExecMemAllocator->cChunks;
540 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
541 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
542 {
543 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
544 if (pvRet)
545 return pvRet;
546 }
547 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
548 {
549 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
550 if (pvRet)
551 return pvRet;
552 }
553 }
554
555 /*
556 * Can we grow it with another chunk?
557 */
558 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
559 {
560 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
561 AssertLogRelRCReturn(rc, NULL);
562
563 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
564 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
565 if (pvRet)
566 return pvRet;
567 AssertFailed();
568 }
569
570 /*
571 * Try prune native TBs once.
572 */
573 if (iIteration == 0)
574 iemTbAllocatorFreeupNativeSpace(pVCpu, cbReq / sizeof(IEMNATIVEINSTR));
575 else
576 {
577 /** @todo stats... */
578 return NULL;
579 }
580 }
581
582}
583
584
585/** This is a hook that we may need later for changing memory protection back
586 * to readonly+exec */
587static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
588{
589#ifdef RT_OS_DARWIN
590 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
591 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
592 AssertRC(rc); RT_NOREF(pVCpu);
593
594 /*
595 * Flush the instruction cache:
596 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
597 */
598 /* sys_dcache_flush(pv, cb); - not necessary */
599 sys_icache_invalidate(pv, cb);
600#else
601 RT_NOREF(pVCpu, pv, cb);
602#endif
603}
604
605
606/**
607 * Frees executable memory.
608 */
609void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
610{
611 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
612 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
613 Assert(pv);
614#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
615 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
616#else
617 Assert(!((uintptr_t)pv & 63));
618#endif
619
620 /* Align the size as we did when allocating the block. */
621#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
622 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
623#else
624 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
625#endif
626
627 /* Free it / assert sanity. */
628#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
629 uint32_t const cChunks = pExecMemAllocator->cChunks;
630 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
631 bool fFound = false;
632 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
633 {
634 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
635 fFound = offChunk < cbChunk;
636 if (fFound)
637 {
638#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
639 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
640 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
641
642 /* Check that it's valid and free it. */
643 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
644 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
645 for (uint32_t i = 1; i < cReqUnits; i++)
646 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
647 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
648
649 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
650 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
651
652 /* Update the stats. */
653 pExecMemAllocator->cbAllocated -= cb;
654 pExecMemAllocator->cbFree += cb;
655 pExecMemAllocator->cAllocations -= 1;
656 return;
657#else
658 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
659 break;
660#endif
661 }
662 }
663# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
664 AssertFailed();
665# else
666 Assert(fFound);
667# endif
668#endif
669
670#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
671 /* Update stats while cb is freshly calculated.*/
672 pExecMemAllocator->cbAllocated -= cb;
673 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
674 pExecMemAllocator->cAllocations -= 1;
675
676 /* Free it. */
677 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
678#endif
679}
680
681
682
683#ifdef IN_RING3
684# ifdef RT_OS_WINDOWS
685
686/**
687 * Initializes the unwind info structures for windows hosts.
688 */
689static int
690iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
691 void *pvChunk, uint32_t idxChunk)
692{
693 RT_NOREF(pVCpu);
694
695 /*
696 * The AMD64 unwind opcodes.
697 *
698 * This is a program that starts with RSP after a RET instruction that
699 * ends up in recompiled code, and the operations we describe here will
700 * restore all non-volatile registers and bring RSP back to where our
701 * RET address is. This means it's reverse order from what happens in
702 * the prologue.
703 *
704 * Note! Using a frame register approach here both because we have one
705 * and but mainly because the UWOP_ALLOC_LARGE argument values
706 * would be a pain to write initializers for. On the positive
707 * side, we're impervious to changes in the the stack variable
708 * area can can deal with dynamic stack allocations if necessary.
709 */
710 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
711 {
712 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
713 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
714 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
715 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
716 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
717 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
718 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
719 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
720 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
721 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
722 };
723 union
724 {
725 IMAGE_UNWIND_INFO Info;
726 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
727 } s_UnwindInfo =
728 {
729 {
730 /* .Version = */ 1,
731 /* .Flags = */ 0,
732 /* .SizeOfProlog = */ 16, /* whatever */
733 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
734 /* .FrameRegister = */ X86_GREG_xBP,
735 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
736 }
737 };
738 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
739 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
740
741 /*
742 * Calc how much space we need and allocate it off the exec heap.
743 */
744 unsigned const cFunctionEntries = 1;
745 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
746 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
747# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
748 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
749 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
750 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
751# else
752 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
753 - pExecMemAllocator->cbHeapBlockHdr;
754 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
755 32 /*cbAlignment*/);
756# endif
757 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
758 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
759
760 /*
761 * Initialize the structures.
762 */
763 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
764
765 paFunctions[0].BeginAddress = 0;
766 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
767 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
768
769 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
770 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
771
772 /*
773 * Register it.
774 */
775 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
776 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
777
778 return VINF_SUCCESS;
779}
780
781
782# else /* !RT_OS_WINDOWS */
783
784/**
785 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
786 */
787DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
788{
789 if (iValue >= 64)
790 {
791 Assert(iValue < 0x2000);
792 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
793 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
794 }
795 else if (iValue >= 0)
796 *Ptr.pb++ = (uint8_t)iValue;
797 else if (iValue > -64)
798 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
799 else
800 {
801 Assert(iValue > -0x2000);
802 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
803 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
804 }
805 return Ptr;
806}
807
808
809/**
810 * Emits an ULEB128 encoded value (up to 64-bit wide).
811 */
812DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
813{
814 while (uValue >= 0x80)
815 {
816 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
817 uValue >>= 7;
818 }
819 *Ptr.pb++ = (uint8_t)uValue;
820 return Ptr;
821}
822
823
824/**
825 * Emits a CFA rule as register @a uReg + offset @a off.
826 */
827DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
828{
829 *Ptr.pb++ = DW_CFA_def_cfa;
830 Ptr = iemDwarfPutUleb128(Ptr, uReg);
831 Ptr = iemDwarfPutUleb128(Ptr, off);
832 return Ptr;
833}
834
835
836/**
837 * Emits a register (@a uReg) save location:
838 * CFA + @a off * data_alignment_factor
839 */
840DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
841{
842 if (uReg < 0x40)
843 *Ptr.pb++ = DW_CFA_offset | uReg;
844 else
845 {
846 *Ptr.pb++ = DW_CFA_offset_extended;
847 Ptr = iemDwarfPutUleb128(Ptr, uReg);
848 }
849 Ptr = iemDwarfPutUleb128(Ptr, off);
850 return Ptr;
851}
852
853
854# if 0 /* unused */
855/**
856 * Emits a register (@a uReg) save location, using signed offset:
857 * CFA + @a offSigned * data_alignment_factor
858 */
859DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
860{
861 *Ptr.pb++ = DW_CFA_offset_extended_sf;
862 Ptr = iemDwarfPutUleb128(Ptr, uReg);
863 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
864 return Ptr;
865}
866# endif
867
868
869/**
870 * Initializes the unwind info section for non-windows hosts.
871 */
872static int
873iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
874 void *pvChunk, uint32_t idxChunk)
875{
876 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
877 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
878
879 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
880
881 /*
882 * Generate the CIE first.
883 */
884# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
885 uint8_t const iDwarfVer = 3;
886# else
887 uint8_t const iDwarfVer = 4;
888# endif
889 RTPTRUNION const PtrCie = Ptr;
890 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
891 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
892 *Ptr.pb++ = iDwarfVer; /* DwARF version */
893 *Ptr.pb++ = 0; /* Augmentation. */
894 if (iDwarfVer >= 4)
895 {
896 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
897 *Ptr.pb++ = 0; /* Segment selector size. */
898 }
899# ifdef RT_ARCH_AMD64
900 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
901# else
902 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
903# endif
904 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
905# ifdef RT_ARCH_AMD64
906 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
907# elif defined(RT_ARCH_ARM64)
908 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
909# else
910# error "port me"
911# endif
912 /* Initial instructions: */
913# ifdef RT_ARCH_AMD64
914 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
915 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
916 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
917 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
918 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
919 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
920 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
921 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
922# elif defined(RT_ARCH_ARM64)
923# if 1
924 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
925# else
926 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
927# endif
928 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
929 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
930 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
931 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
932 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
933 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
934 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
935 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
936 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
937 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
938 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
939 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
940 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
941 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
942# else
943# error "port me"
944# endif
945 while ((Ptr.u - PtrCie.u) & 3)
946 *Ptr.pb++ = DW_CFA_nop;
947 /* Finalize the CIE size. */
948 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
949
950 /*
951 * Generate an FDE for the whole chunk area.
952 */
953# ifdef IEMNATIVE_USE_LIBUNWIND
954 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
955# endif
956 RTPTRUNION const PtrFde = Ptr;
957 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
958 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
959 Ptr.pu32++;
960 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
961 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
962# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
963 *Ptr.pb++ = DW_CFA_nop;
964# endif
965 while ((Ptr.u - PtrFde.u) & 3)
966 *Ptr.pb++ = DW_CFA_nop;
967 /* Finalize the FDE size. */
968 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
969
970 /* Terminator entry. */
971 *Ptr.pu32++ = 0;
972 *Ptr.pu32++ = 0; /* just to be sure... */
973 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
974
975 /*
976 * Register it.
977 */
978# ifdef IEMNATIVE_USE_LIBUNWIND
979 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
980# else
981 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
982 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
983# endif
984
985# ifdef IEMNATIVE_USE_GDB_JIT
986 /*
987 * Now for telling GDB about this (experimental).
988 *
989 * This seems to work best with ET_DYN.
990 */
991 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
992# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
993 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
994 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
995# else
996 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
997 - pExecMemAllocator->cbHeapBlockHdr;
998 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
999# endif
1000 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
1001 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
1002
1003 RT_ZERO(*pSymFile);
1004
1005 /*
1006 * The ELF header:
1007 */
1008 pSymFile->EHdr.e_ident[0] = ELFMAG0;
1009 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1010 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1011 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1012 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1013 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1014 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1015 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1016# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1017 pSymFile->EHdr.e_type = ET_DYN;
1018# else
1019 pSymFile->EHdr.e_type = ET_REL;
1020# endif
1021# ifdef RT_ARCH_AMD64
1022 pSymFile->EHdr.e_machine = EM_AMD64;
1023# elif defined(RT_ARCH_ARM64)
1024 pSymFile->EHdr.e_machine = EM_AARCH64;
1025# else
1026# error "port me"
1027# endif
1028 pSymFile->EHdr.e_version = 1; /*?*/
1029 pSymFile->EHdr.e_entry = 0;
1030# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1031 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1032# else
1033 pSymFile->EHdr.e_phoff = 0;
1034# endif
1035 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1036 pSymFile->EHdr.e_flags = 0;
1037 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1038# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1039 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1040 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1041# else
1042 pSymFile->EHdr.e_phentsize = 0;
1043 pSymFile->EHdr.e_phnum = 0;
1044# endif
1045 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1046 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1047 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1048
1049 uint32_t offStrTab = 0;
1050#define APPEND_STR(a_szStr) do { \
1051 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1052 offStrTab += sizeof(a_szStr); \
1053 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1054 } while (0)
1055#define APPEND_STR_FMT(a_szStr, ...) do { \
1056 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1057 offStrTab++; \
1058 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1059 } while (0)
1060
1061 /*
1062 * Section headers.
1063 */
1064 /* Section header #0: NULL */
1065 unsigned i = 0;
1066 APPEND_STR("");
1067 RT_ZERO(pSymFile->aShdrs[i]);
1068 i++;
1069
1070 /* Section header: .eh_frame */
1071 pSymFile->aShdrs[i].sh_name = offStrTab;
1072 APPEND_STR(".eh_frame");
1073 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1074 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1075# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1076 pSymFile->aShdrs[i].sh_offset
1077 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1078# else
1079 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1080 pSymFile->aShdrs[i].sh_offset = 0;
1081# endif
1082
1083 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1084 pSymFile->aShdrs[i].sh_link = 0;
1085 pSymFile->aShdrs[i].sh_info = 0;
1086 pSymFile->aShdrs[i].sh_addralign = 1;
1087 pSymFile->aShdrs[i].sh_entsize = 0;
1088 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1089 i++;
1090
1091 /* Section header: .shstrtab */
1092 unsigned const iShStrTab = i;
1093 pSymFile->EHdr.e_shstrndx = iShStrTab;
1094 pSymFile->aShdrs[i].sh_name = offStrTab;
1095 APPEND_STR(".shstrtab");
1096 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1097 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1098# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1099 pSymFile->aShdrs[i].sh_offset
1100 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1101# else
1102 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1103 pSymFile->aShdrs[i].sh_offset = 0;
1104# endif
1105 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1106 pSymFile->aShdrs[i].sh_link = 0;
1107 pSymFile->aShdrs[i].sh_info = 0;
1108 pSymFile->aShdrs[i].sh_addralign = 1;
1109 pSymFile->aShdrs[i].sh_entsize = 0;
1110 i++;
1111
1112 /* Section header: .symbols */
1113 pSymFile->aShdrs[i].sh_name = offStrTab;
1114 APPEND_STR(".symtab");
1115 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1116 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1117 pSymFile->aShdrs[i].sh_offset
1118 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1119 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1120 pSymFile->aShdrs[i].sh_link = iShStrTab;
1121 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1122 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1123 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1124 i++;
1125
1126# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1127 /* Section header: .symbols */
1128 pSymFile->aShdrs[i].sh_name = offStrTab;
1129 APPEND_STR(".dynsym");
1130 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1131 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1132 pSymFile->aShdrs[i].sh_offset
1133 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1134 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1135 pSymFile->aShdrs[i].sh_link = iShStrTab;
1136 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1137 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1138 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1139 i++;
1140# endif
1141
1142# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1143 /* Section header: .dynamic */
1144 pSymFile->aShdrs[i].sh_name = offStrTab;
1145 APPEND_STR(".dynamic");
1146 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1147 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1148 pSymFile->aShdrs[i].sh_offset
1149 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1150 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1151 pSymFile->aShdrs[i].sh_link = iShStrTab;
1152 pSymFile->aShdrs[i].sh_info = 0;
1153 pSymFile->aShdrs[i].sh_addralign = 1;
1154 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1155 i++;
1156# endif
1157
1158 /* Section header: .text */
1159 unsigned const iShText = i;
1160 pSymFile->aShdrs[i].sh_name = offStrTab;
1161 APPEND_STR(".text");
1162 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1163 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1164# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1165 pSymFile->aShdrs[i].sh_offset
1166 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1167# else
1168 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1169 pSymFile->aShdrs[i].sh_offset = 0;
1170# endif
1171 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1172 pSymFile->aShdrs[i].sh_link = 0;
1173 pSymFile->aShdrs[i].sh_info = 0;
1174 pSymFile->aShdrs[i].sh_addralign = 1;
1175 pSymFile->aShdrs[i].sh_entsize = 0;
1176 i++;
1177
1178 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1179
1180# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1181 /*
1182 * The program headers:
1183 */
1184 /* Everything in a single LOAD segment: */
1185 i = 0;
1186 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1187 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1188 pSymFile->aPhdrs[i].p_offset
1189 = pSymFile->aPhdrs[i].p_vaddr
1190 = pSymFile->aPhdrs[i].p_paddr = 0;
1191 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1192 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1193 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1194 i++;
1195 /* The .dynamic segment. */
1196 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1197 pSymFile->aPhdrs[i].p_flags = PF_R;
1198 pSymFile->aPhdrs[i].p_offset
1199 = pSymFile->aPhdrs[i].p_vaddr
1200 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1201 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1202 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1203 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1204 i++;
1205
1206 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1207
1208 /*
1209 * The dynamic section:
1210 */
1211 i = 0;
1212 pSymFile->aDyn[i].d_tag = DT_SONAME;
1213 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1214 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1215 i++;
1216 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1217 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1218 i++;
1219 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1220 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1221 i++;
1222 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1223 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1224 i++;
1225 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1226 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1227 i++;
1228 pSymFile->aDyn[i].d_tag = DT_NULL;
1229 i++;
1230 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1231# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1232
1233 /*
1234 * Symbol tables:
1235 */
1236 /** @todo gdb doesn't seem to really like this ... */
1237 i = 0;
1238 pSymFile->aSymbols[i].st_name = 0;
1239 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1240 pSymFile->aSymbols[i].st_value = 0;
1241 pSymFile->aSymbols[i].st_size = 0;
1242 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1243 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1244# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1245 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1246# endif
1247 i++;
1248
1249 pSymFile->aSymbols[i].st_name = 0;
1250 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1251 pSymFile->aSymbols[i].st_value = 0;
1252 pSymFile->aSymbols[i].st_size = 0;
1253 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1254 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1255 i++;
1256
1257 pSymFile->aSymbols[i].st_name = offStrTab;
1258 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1259# if 0
1260 pSymFile->aSymbols[i].st_shndx = iShText;
1261 pSymFile->aSymbols[i].st_value = 0;
1262# else
1263 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1264 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1265# endif
1266 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1267 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1268 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1269# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1270 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1271 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1272# endif
1273 i++;
1274
1275 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1276 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1277
1278 /*
1279 * The GDB JIT entry and informing GDB.
1280 */
1281 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1282# if 1
1283 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1284# else
1285 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1286# endif
1287
1288 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1289 RTCritSectEnter(&g_IemNativeGdbJitLock);
1290 pEhFrame->GdbJitEntry.pNext = NULL;
1291 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1292 if (__jit_debug_descriptor.pTail)
1293 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1294 else
1295 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1296 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1297 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1298
1299 /* Notify GDB: */
1300 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1301 __jit_debug_register_code();
1302 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1303 RTCritSectLeave(&g_IemNativeGdbJitLock);
1304
1305# else /* !IEMNATIVE_USE_GDB_JIT */
1306 RT_NOREF(pVCpu);
1307# endif /* !IEMNATIVE_USE_GDB_JIT */
1308
1309 return VINF_SUCCESS;
1310}
1311
1312# endif /* !RT_OS_WINDOWS */
1313#endif /* IN_RING3 */
1314
1315
1316/**
1317 * Adds another chunk to the executable memory allocator.
1318 *
1319 * This is used by the init code for the initial allocation and later by the
1320 * regular allocator function when it's out of memory.
1321 */
1322static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1323{
1324 /* Check that we've room for growth. */
1325 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1326 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1327
1328 /* Allocate a chunk. */
1329#ifdef RT_OS_DARWIN
1330 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1331#else
1332 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1333#endif
1334 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1335
1336#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1337 int rc = VINF_SUCCESS;
1338#else
1339 /* Initialize the heap for the chunk. */
1340 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1341 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1342 AssertRC(rc);
1343 if (RT_SUCCESS(rc))
1344 {
1345 /*
1346 * We want the memory to be aligned on 64 byte, so the first time thru
1347 * here we do some exploratory allocations to see how we can achieve this.
1348 * On subsequent runs we only make an initial adjustment allocation, if
1349 * necessary.
1350 *
1351 * Since we own the heap implementation, we know that the internal block
1352 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1353 * so all we need to wrt allocation size adjustments is to add 32 bytes
1354 * to the size, align up by 64 bytes, and subtract 32 bytes.
1355 *
1356 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1357 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1358 * allocation to force subsequent allocations to return 64 byte aligned
1359 * user areas.
1360 */
1361 if (!pExecMemAllocator->cbHeapBlockHdr)
1362 {
1363 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1364 pExecMemAllocator->cbHeapAlignTweak = 64;
1365 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1366 32 /*cbAlignment*/);
1367 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1368
1369 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1370 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1371 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1372 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1373 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1374
1375 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1376 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1377 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1378 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1379 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1380
1381 RTHeapSimpleFree(hHeap, pvTest2);
1382 RTHeapSimpleFree(hHeap, pvTest1);
1383 }
1384 else
1385 {
1386 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1387 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1388 }
1389 if (RT_SUCCESS(rc))
1390#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1391 {
1392 /*
1393 * Add the chunk.
1394 *
1395 * This must be done before the unwind init so windows can allocate
1396 * memory from the chunk when using the alternative sub-allocator.
1397 */
1398 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1399#ifdef IN_RING3
1400 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1401#endif
1402#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1403 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1404#else
1405 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1406 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1407 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1408 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1409#endif
1410
1411 pExecMemAllocator->cChunks = idxChunk + 1;
1412 pExecMemAllocator->idxChunkHint = idxChunk;
1413
1414#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1415 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1416 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1417#else
1418 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1419 pExecMemAllocator->cbTotal += cbFree;
1420 pExecMemAllocator->cbFree += cbFree;
1421#endif
1422
1423#ifdef IN_RING3
1424 /*
1425 * Initialize the unwind information (this cannot really fail atm).
1426 * (This sets pvUnwindInfo.)
1427 */
1428 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1429 if (RT_SUCCESS(rc))
1430#endif
1431 {
1432 return VINF_SUCCESS;
1433 }
1434
1435#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1436 /* Just in case the impossible happens, undo the above up: */
1437 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1438 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1439 pExecMemAllocator->cChunks = idxChunk;
1440 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1441 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1442 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1443 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1444#endif
1445 }
1446#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1447 }
1448#endif
1449 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1450 RT_NOREF(pVCpu);
1451 return rc;
1452}
1453
1454
1455/**
1456 * Initializes the executable memory allocator for native recompilation on the
1457 * calling EMT.
1458 *
1459 * @returns VBox status code.
1460 * @param pVCpu The cross context virtual CPU structure of the calling
1461 * thread.
1462 * @param cbMax The max size of the allocator.
1463 * @param cbInitial The initial allocator size.
1464 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1465 * dependent).
1466 */
1467int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1468{
1469 /*
1470 * Validate input.
1471 */
1472 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1473 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1474 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1475 || cbChunk == 0
1476 || ( RT_IS_POWER_OF_TWO(cbChunk)
1477 && cbChunk >= _1M
1478 && cbChunk <= _256M
1479 && cbChunk <= cbMax),
1480 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1481 VERR_OUT_OF_RANGE);
1482
1483 /*
1484 * Adjust/figure out the chunk size.
1485 */
1486 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1487 {
1488 if (cbMax >= _256M)
1489 cbChunk = _64M;
1490 else
1491 {
1492 if (cbMax < _16M)
1493 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1494 else
1495 cbChunk = (uint32_t)cbMax / 4;
1496 if (!RT_IS_POWER_OF_TWO(cbChunk))
1497 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1498 }
1499 }
1500
1501 if (cbChunk > cbMax)
1502 cbMax = cbChunk;
1503 else
1504 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1505 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1506 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1507
1508 /*
1509 * Allocate and initialize the allocatore instance.
1510 */
1511 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1512#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1513 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1514 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1515 cbNeeded += cbBitmap * cMaxChunks;
1516 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1517 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1518#endif
1519#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1520 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1521 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1522#endif
1523 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1524 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1525 VERR_NO_MEMORY);
1526 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1527 pExecMemAllocator->cbChunk = cbChunk;
1528 pExecMemAllocator->cMaxChunks = cMaxChunks;
1529 pExecMemAllocator->cChunks = 0;
1530 pExecMemAllocator->idxChunkHint = 0;
1531 pExecMemAllocator->cAllocations = 0;
1532 pExecMemAllocator->cbTotal = 0;
1533 pExecMemAllocator->cbFree = 0;
1534 pExecMemAllocator->cbAllocated = 0;
1535#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1536 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1537 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1538 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1539 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1540#endif
1541#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1542 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1543#endif
1544 for (uint32_t i = 0; i < cMaxChunks; i++)
1545 {
1546#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1547 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1548 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1549#else
1550 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1551#endif
1552 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1553#ifdef IN_RING0
1554 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1555#else
1556 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1557#endif
1558 }
1559 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1560
1561 /*
1562 * Do the initial allocations.
1563 */
1564 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1565 {
1566 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1567 AssertLogRelRCReturn(rc, rc);
1568 }
1569
1570 pExecMemAllocator->idxChunkHint = 0;
1571
1572 return VINF_SUCCESS;
1573}
1574
1575
1576/*********************************************************************************************************************************
1577* Native Recompilation *
1578*********************************************************************************************************************************/
1579
1580
1581/**
1582 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1583 */
1584IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1585{
1586 pVCpu->iem.s.cInstructions += idxInstr;
1587 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1588}
1589
1590
1591/**
1592 * Used by TB code when it wants to raise a \#GP(0).
1593 */
1594IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
1595{
1596 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1597#ifndef _MSC_VER
1598 return VINF_IEM_RAISED_XCPT; /* not reached */
1599#endif
1600}
1601
1602
1603/**
1604 * Used by TB code when it wants to raise a \#NM.
1605 */
1606IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
1607{
1608 iemRaiseDeviceNotAvailableJmp(pVCpu);
1609#ifndef _MSC_VER
1610 return VINF_IEM_RAISED_XCPT; /* not reached */
1611#endif
1612}
1613
1614
1615/**
1616 * Used by TB code when it wants to raise a \#UD.
1617 */
1618IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
1619{
1620 iemRaiseUndefinedOpcodeJmp(pVCpu);
1621#ifndef _MSC_VER
1622 return VINF_IEM_RAISED_XCPT; /* not reached */
1623#endif
1624}
1625
1626
1627/**
1628 * Used by TB code when it wants to raise a \#MF.
1629 */
1630IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
1631{
1632 iemRaiseMathFaultJmp(pVCpu);
1633#ifndef _MSC_VER
1634 return VINF_IEM_RAISED_XCPT; /* not reached */
1635#endif
1636}
1637
1638
1639/**
1640 * Used by TB code when it wants to raise a \#XF.
1641 */
1642IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
1643{
1644 iemRaiseSimdFpExceptionJmp(pVCpu);
1645#ifndef _MSC_VER
1646 return VINF_IEM_RAISED_XCPT; /* not reached */
1647#endif
1648}
1649
1650
1651/**
1652 * Used by TB code when detecting opcode changes.
1653 * @see iemThreadeFuncWorkerObsoleteTb
1654 */
1655IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
1656{
1657 /* We set fSafeToFree to false where as we're being called in the context
1658 of a TB callback function, which for native TBs means we cannot release
1659 the executable memory till we've returned our way back to iemTbExec as
1660 that return path codes via the native code generated for the TB. */
1661 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1662 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
1663 return VINF_IEM_REEXEC_BREAK;
1664}
1665
1666
1667/**
1668 * Used by TB code when we need to switch to a TB with CS.LIM checking.
1669 */
1670IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
1671{
1672 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
1673 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1674 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
1675 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
1676 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
1677 return VINF_IEM_REEXEC_BREAK;
1678}
1679
1680
1681/**
1682 * Used by TB code when we missed a PC check after a branch.
1683 */
1684IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
1685{
1686 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
1687 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1688 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
1689 pVCpu->iem.s.pbInstrBuf));
1690 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
1691 return VINF_IEM_REEXEC_BREAK;
1692}
1693
1694
1695
1696/*********************************************************************************************************************************
1697* Helpers: Segmented memory fetches and stores. *
1698*********************************************************************************************************************************/
1699
1700/**
1701 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1702 */
1703IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1704{
1705#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1706 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1707#else
1708 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1709#endif
1710}
1711
1712
1713/**
1714 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1715 * to 16 bits.
1716 */
1717IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1718{
1719#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1720 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1721#else
1722 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1723#endif
1724}
1725
1726
1727/**
1728 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1729 * to 32 bits.
1730 */
1731IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1732{
1733#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1734 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1735#else
1736 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1737#endif
1738}
1739
1740/**
1741 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1742 * to 64 bits.
1743 */
1744IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1745{
1746#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1747 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1748#else
1749 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1750#endif
1751}
1752
1753
1754/**
1755 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1756 */
1757IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1758{
1759#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1760 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1761#else
1762 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1763#endif
1764}
1765
1766
1767/**
1768 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1769 * to 32 bits.
1770 */
1771IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1772{
1773#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1774 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1775#else
1776 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1777#endif
1778}
1779
1780
1781/**
1782 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1783 * to 64 bits.
1784 */
1785IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1786{
1787#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1788 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1789#else
1790 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1791#endif
1792}
1793
1794
1795/**
1796 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1797 */
1798IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1799{
1800#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1801 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1802#else
1803 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1804#endif
1805}
1806
1807
1808/**
1809 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
1810 * to 64 bits.
1811 */
1812IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1813{
1814#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1815 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1816#else
1817 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1818#endif
1819}
1820
1821
1822/**
1823 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1824 */
1825IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1826{
1827#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1828 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
1829#else
1830 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
1831#endif
1832}
1833
1834
1835/**
1836 * Used by TB code to store unsigned 8-bit data w/ segmentation.
1837 */
1838IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
1839{
1840#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1841 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1842#else
1843 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1844#endif
1845}
1846
1847
1848/**
1849 * Used by TB code to store unsigned 16-bit data w/ segmentation.
1850 */
1851IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
1852{
1853#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1854 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1855#else
1856 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1857#endif
1858}
1859
1860
1861/**
1862 * Used by TB code to store unsigned 32-bit data w/ segmentation.
1863 */
1864IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
1865{
1866#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1867 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1868#else
1869 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1870#endif
1871}
1872
1873
1874/**
1875 * Used by TB code to store unsigned 64-bit data w/ segmentation.
1876 */
1877IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
1878{
1879#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1880 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1881#else
1882 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1883#endif
1884}
1885
1886
1887
1888/**
1889 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
1890 */
1891IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1892{
1893#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1894 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1895#else
1896 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1897#endif
1898}
1899
1900
1901/**
1902 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
1903 */
1904IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1905{
1906#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1907 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1908#else
1909 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1910#endif
1911}
1912
1913
1914/**
1915 * Used by TB code to store an 32-bit selector value onto a generic stack.
1916 *
1917 * Intel CPUs doesn't do write a whole dword, thus the special function.
1918 */
1919IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1920{
1921#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1922 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1923#else
1924 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1925#endif
1926}
1927
1928
1929/**
1930 * Used by TB code to push unsigned 64-bit value onto a generic stack.
1931 */
1932IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1933{
1934#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1935 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1936#else
1937 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1938#endif
1939}
1940
1941
1942/**
1943 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1944 */
1945IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1946{
1947#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1948 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1949#else
1950 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
1951#endif
1952}
1953
1954
1955/**
1956 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1957 */
1958IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1959{
1960#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1961 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
1962#else
1963 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
1964#endif
1965}
1966
1967
1968/**
1969 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
1970 */
1971IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1972{
1973#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1974 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
1975#else
1976 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
1977#endif
1978}
1979
1980
1981
1982/*********************************************************************************************************************************
1983* Helpers: Flat memory fetches and stores. *
1984*********************************************************************************************************************************/
1985
1986/**
1987 * Used by TB code to load unsigned 8-bit data w/ flat address.
1988 * @note Zero extending the value to 64-bit to simplify assembly.
1989 */
1990IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1991{
1992#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1993 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1994#else
1995 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1996#endif
1997}
1998
1999
2000/**
2001 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2002 * to 16 bits.
2003 * @note Zero extending the value to 64-bit to simplify assembly.
2004 */
2005IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2006{
2007#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2008 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2009#else
2010 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2011#endif
2012}
2013
2014
2015/**
2016 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2017 * to 32 bits.
2018 * @note Zero extending the value to 64-bit to simplify assembly.
2019 */
2020IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2021{
2022#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2023 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2024#else
2025 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2026#endif
2027}
2028
2029
2030/**
2031 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2032 * to 64 bits.
2033 */
2034IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2035{
2036#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2037 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2038#else
2039 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2040#endif
2041}
2042
2043
2044/**
2045 * Used by TB code to load unsigned 16-bit data w/ flat address.
2046 * @note Zero extending the value to 64-bit to simplify assembly.
2047 */
2048IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2049{
2050#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2051 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2052#else
2053 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2054#endif
2055}
2056
2057
2058/**
2059 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2060 * to 32 bits.
2061 * @note Zero extending the value to 64-bit to simplify assembly.
2062 */
2063IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2064{
2065#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2066 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2067#else
2068 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2069#endif
2070}
2071
2072
2073/**
2074 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2075 * to 64 bits.
2076 * @note Zero extending the value to 64-bit to simplify assembly.
2077 */
2078IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2079{
2080#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2081 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2082#else
2083 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2084#endif
2085}
2086
2087
2088/**
2089 * Used by TB code to load unsigned 32-bit data w/ flat address.
2090 * @note Zero extending the value to 64-bit to simplify assembly.
2091 */
2092IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2093{
2094#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2095 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2096#else
2097 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2098#endif
2099}
2100
2101
2102/**
2103 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
2104 * to 64 bits.
2105 * @note Zero extending the value to 64-bit to simplify assembly.
2106 */
2107IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2108{
2109#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2110 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2111#else
2112 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2113#endif
2114}
2115
2116
2117/**
2118 * Used by TB code to load unsigned 64-bit data w/ flat address.
2119 */
2120IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2121{
2122#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2123 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2124#else
2125 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
2126#endif
2127}
2128
2129
2130/**
2131 * Used by TB code to store unsigned 8-bit data w/ flat address.
2132 */
2133IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
2134{
2135#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2136 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
2137#else
2138 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
2139#endif
2140}
2141
2142
2143/**
2144 * Used by TB code to store unsigned 16-bit data w/ flat address.
2145 */
2146IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2147{
2148#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2149 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
2150#else
2151 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
2152#endif
2153}
2154
2155
2156/**
2157 * Used by TB code to store unsigned 32-bit data w/ flat address.
2158 */
2159IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2160{
2161#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2162 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
2163#else
2164 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
2165#endif
2166}
2167
2168
2169/**
2170 * Used by TB code to store unsigned 64-bit data w/ flat address.
2171 */
2172IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2173{
2174#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2175 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
2176#else
2177 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
2178#endif
2179}
2180
2181
2182
2183/**
2184 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
2185 */
2186IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2187{
2188#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2189 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2190#else
2191 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2192#endif
2193}
2194
2195
2196/**
2197 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
2198 */
2199IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2200{
2201#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2202 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2203#else
2204 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2205#endif
2206}
2207
2208
2209/**
2210 * Used by TB code to store a segment selector value onto a flat stack.
2211 *
2212 * Intel CPUs doesn't do write a whole dword, thus the special function.
2213 */
2214IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2215{
2216#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2217 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2218#else
2219 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2220#endif
2221}
2222
2223
2224/**
2225 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
2226 */
2227IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2228{
2229#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2230 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2231#else
2232 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2233#endif
2234}
2235
2236
2237/**
2238 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2239 */
2240IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2241{
2242#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2243 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2244#else
2245 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
2246#endif
2247}
2248
2249
2250/**
2251 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2252 */
2253IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2254{
2255#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2256 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2257#else
2258 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
2259#endif
2260}
2261
2262
2263/**
2264 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2265 */
2266IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2267{
2268#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2269 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2270#else
2271 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
2272#endif
2273}
2274
2275
2276
2277/*********************************************************************************************************************************
2278* Helpers: Segmented memory mapping. *
2279*********************************************************************************************************************************/
2280
2281/**
2282 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
2283 * segmentation.
2284 */
2285IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2286 RTGCPTR GCPtrMem, uint8_t iSegReg))
2287{
2288#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2289 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2290#else
2291 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2292#endif
2293}
2294
2295
2296/**
2297 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
2298 */
2299IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2300 RTGCPTR GCPtrMem, uint8_t iSegReg))
2301{
2302#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2303 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2304#else
2305 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2306#endif
2307}
2308
2309
2310/**
2311 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
2312 */
2313IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2314 RTGCPTR GCPtrMem, uint8_t iSegReg))
2315{
2316#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2317 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2318#else
2319 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2320#endif
2321}
2322
2323
2324/**
2325 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
2326 */
2327IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2328 RTGCPTR GCPtrMem, uint8_t iSegReg))
2329{
2330#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2331 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2332#else
2333 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2334#endif
2335}
2336
2337
2338/**
2339 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
2340 * segmentation.
2341 */
2342IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2343 RTGCPTR GCPtrMem, uint8_t iSegReg))
2344{
2345#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2346 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2347#else
2348 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2349#endif
2350}
2351
2352
2353/**
2354 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
2355 */
2356IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2357 RTGCPTR GCPtrMem, uint8_t iSegReg))
2358{
2359#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2360 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2361#else
2362 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2363#endif
2364}
2365
2366
2367/**
2368 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
2369 */
2370IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2371 RTGCPTR GCPtrMem, uint8_t iSegReg))
2372{
2373#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2374 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2375#else
2376 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2377#endif
2378}
2379
2380
2381/**
2382 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
2383 */
2384IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2385 RTGCPTR GCPtrMem, uint8_t iSegReg))
2386{
2387#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2388 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2389#else
2390 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2391#endif
2392}
2393
2394
2395/**
2396 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
2397 * segmentation.
2398 */
2399IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2400 RTGCPTR GCPtrMem, uint8_t iSegReg))
2401{
2402#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2403 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2404#else
2405 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2406#endif
2407}
2408
2409
2410/**
2411 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
2412 */
2413IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2414 RTGCPTR GCPtrMem, uint8_t iSegReg))
2415{
2416#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2417 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2418#else
2419 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2420#endif
2421}
2422
2423
2424/**
2425 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
2426 */
2427IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2428 RTGCPTR GCPtrMem, uint8_t iSegReg))
2429{
2430#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2431 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2432#else
2433 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2434#endif
2435}
2436
2437
2438/**
2439 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
2440 */
2441IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2442 RTGCPTR GCPtrMem, uint8_t iSegReg))
2443{
2444#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2445 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2446#else
2447 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2448#endif
2449}
2450
2451
2452/**
2453 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
2454 * segmentation.
2455 */
2456IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2457 RTGCPTR GCPtrMem, uint8_t iSegReg))
2458{
2459#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2460 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2461#else
2462 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2463#endif
2464}
2465
2466
2467/**
2468 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
2469 */
2470IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2471 RTGCPTR GCPtrMem, uint8_t iSegReg))
2472{
2473#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2474 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2475#else
2476 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2477#endif
2478}
2479
2480
2481/**
2482 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
2483 */
2484IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2485 RTGCPTR GCPtrMem, uint8_t iSegReg))
2486{
2487#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2488 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2489#else
2490 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2491#endif
2492}
2493
2494
2495/**
2496 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
2497 */
2498IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2499 RTGCPTR GCPtrMem, uint8_t iSegReg))
2500{
2501#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2502 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2503#else
2504 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2505#endif
2506}
2507
2508
2509/**
2510 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
2511 */
2512IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2513 RTGCPTR GCPtrMem, uint8_t iSegReg))
2514{
2515#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2516 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2517#else
2518 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2519#endif
2520}
2521
2522
2523/**
2524 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
2525 */
2526IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2527 RTGCPTR GCPtrMem, uint8_t iSegReg))
2528{
2529#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2530 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2531#else
2532 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2533#endif
2534}
2535
2536
2537/**
2538 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
2539 * segmentation.
2540 */
2541IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2542 RTGCPTR GCPtrMem, uint8_t iSegReg))
2543{
2544#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2545 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2546#else
2547 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2548#endif
2549}
2550
2551
2552/**
2553 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
2554 */
2555IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2556 RTGCPTR GCPtrMem, uint8_t iSegReg))
2557{
2558#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2559 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2560#else
2561 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2562#endif
2563}
2564
2565
2566/**
2567 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
2568 */
2569IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2570 RTGCPTR GCPtrMem, uint8_t iSegReg))
2571{
2572#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2573 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2574#else
2575 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2576#endif
2577}
2578
2579
2580/**
2581 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
2582 */
2583IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2584 RTGCPTR GCPtrMem, uint8_t iSegReg))
2585{
2586#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2587 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2588#else
2589 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2590#endif
2591}
2592
2593
2594/*********************************************************************************************************************************
2595* Helpers: Flat memory mapping. *
2596*********************************************************************************************************************************/
2597
2598/**
2599 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
2600 * address.
2601 */
2602IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2603{
2604#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2605 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2606#else
2607 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2608#endif
2609}
2610
2611
2612/**
2613 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
2614 */
2615IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2616{
2617#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2618 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2619#else
2620 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2621#endif
2622}
2623
2624
2625/**
2626 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
2627 */
2628IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2629{
2630#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2631 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2632#else
2633 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2634#endif
2635}
2636
2637
2638/**
2639 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
2640 */
2641IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2642{
2643#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2644 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2645#else
2646 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2647#endif
2648}
2649
2650
2651/**
2652 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
2653 * address.
2654 */
2655IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2656{
2657#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2658 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2659#else
2660 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2661#endif
2662}
2663
2664
2665/**
2666 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
2667 */
2668IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2669{
2670#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2671 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2672#else
2673 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2674#endif
2675}
2676
2677
2678/**
2679 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
2680 */
2681IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2682{
2683#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2684 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2685#else
2686 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2687#endif
2688}
2689
2690
2691/**
2692 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
2693 */
2694IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2695{
2696#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2697 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2698#else
2699 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2700#endif
2701}
2702
2703
2704/**
2705 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
2706 * address.
2707 */
2708IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2709{
2710#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2711 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2712#else
2713 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2714#endif
2715}
2716
2717
2718/**
2719 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
2720 */
2721IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2722{
2723#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2724 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2725#else
2726 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2727#endif
2728}
2729
2730
2731/**
2732 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
2733 */
2734IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2735{
2736#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2737 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2738#else
2739 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2740#endif
2741}
2742
2743
2744/**
2745 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
2746 */
2747IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2748{
2749#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2750 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2751#else
2752 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2753#endif
2754}
2755
2756
2757/**
2758 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
2759 * address.
2760 */
2761IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2762{
2763#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2764 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2765#else
2766 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2767#endif
2768}
2769
2770
2771/**
2772 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
2773 */
2774IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2775{
2776#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2777 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2778#else
2779 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2780#endif
2781}
2782
2783
2784/**
2785 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
2786 */
2787IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2788{
2789#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2790 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2791#else
2792 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2793#endif
2794}
2795
2796
2797/**
2798 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
2799 */
2800IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2801{
2802#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2803 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2804#else
2805 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2806#endif
2807}
2808
2809
2810/**
2811 * Used by TB code to map 80-bit float data writeonly w/ flat address.
2812 */
2813IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2814{
2815#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2816 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2817#else
2818 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2819#endif
2820}
2821
2822
2823/**
2824 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
2825 */
2826IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2827{
2828#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2829 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2830#else
2831 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2832#endif
2833}
2834
2835
2836/**
2837 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
2838 * address.
2839 */
2840IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2841{
2842#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2843 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2844#else
2845 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2846#endif
2847}
2848
2849
2850/**
2851 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
2852 */
2853IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2854{
2855#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2856 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2857#else
2858 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2859#endif
2860}
2861
2862
2863/**
2864 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
2865 */
2866IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2867{
2868#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2869 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2870#else
2871 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2872#endif
2873}
2874
2875
2876/**
2877 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
2878 */
2879IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2880{
2881#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2882 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2883#else
2884 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2885#endif
2886}
2887
2888
2889/*********************************************************************************************************************************
2890* Helpers: Commit, rollback & unmap *
2891*********************************************************************************************************************************/
2892
2893/**
2894 * Used by TB code to commit and unmap a read-write memory mapping.
2895 */
2896IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2897{
2898 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
2899}
2900
2901
2902/**
2903 * Used by TB code to commit and unmap a read-write memory mapping.
2904 */
2905IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2906{
2907 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2908}
2909
2910
2911/**
2912 * Used by TB code to commit and unmap a write-only memory mapping.
2913 */
2914IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2915{
2916 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2917}
2918
2919
2920/**
2921 * Used by TB code to commit and unmap a read-only memory mapping.
2922 */
2923IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2924{
2925 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2926}
2927
2928
2929/**
2930 * Reinitializes the native recompiler state.
2931 *
2932 * Called before starting a new recompile job.
2933 */
2934static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2935{
2936 pReNative->cLabels = 0;
2937 pReNative->bmLabelTypes = 0;
2938 pReNative->cFixups = 0;
2939#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2940 pReNative->pDbgInfo->cEntries = 0;
2941#endif
2942 pReNative->pTbOrg = pTb;
2943 pReNative->cCondDepth = 0;
2944 pReNative->uCondSeqNo = 0;
2945 pReNative->uCheckIrqSeqNo = 0;
2946 pReNative->uTlbSeqNo = 0;
2947
2948#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2949 pReNative->Core.offPc = 0;
2950 pReNative->Core.cInstrPcUpdateSkipped = 0;
2951#endif
2952 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2953#if IEMNATIVE_HST_GREG_COUNT < 32
2954 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2955#endif
2956 ;
2957 pReNative->Core.bmHstRegsWithGstShadow = 0;
2958 pReNative->Core.bmGstRegShadows = 0;
2959 pReNative->Core.bmVars = 0;
2960 pReNative->Core.bmStack = 0;
2961 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
2962 pReNative->Core.u64ArgVars = UINT64_MAX;
2963
2964 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 13);
2965 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
2966 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
2967 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
2968 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
2969 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
2970 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
2971 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
2972 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
2973 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
2974 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
2975 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
2976 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
2977 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
2978
2979 /* Full host register reinit: */
2980 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
2981 {
2982 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
2983 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
2984 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
2985 }
2986
2987 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
2988 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
2989#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2990 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
2991#endif
2992#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2993 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2994#endif
2995#ifdef IEMNATIVE_REG_FIXED_TMP1
2996 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
2997#endif
2998#ifdef IEMNATIVE_REG_FIXED_PC_DBG
2999 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
3000#endif
3001 );
3002 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3003 {
3004 fRegs &= ~RT_BIT_32(idxReg);
3005 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
3006 }
3007
3008 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
3009#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3010 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
3011#endif
3012#ifdef IEMNATIVE_REG_FIXED_TMP0
3013 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3014#endif
3015#ifdef IEMNATIVE_REG_FIXED_TMP1
3016 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
3017#endif
3018#ifdef IEMNATIVE_REG_FIXED_PC_DBG
3019 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
3020#endif
3021
3022#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3023# ifdef RT_ARCH_ARM64
3024 /*
3025 * Arm64 has 32 128-bit registers only, in order to support emulating 256-bit registers we pair
3026 * two real registers statically to one virtual for now, leaving us with only 16 256-bit registers.
3027 * We always pair v0 with v1, v2 with v3, etc. so we mark the higher register as fixed here during init
3028 * and the register allocator assumes that it will be always free when the lower is picked.
3029 */
3030 uint32_t const fFixedAdditional = UINT32_C(0xaaaaaaaa);
3031# else
3032 uint32_t const fFixedAdditional = 0;
3033# endif
3034
3035 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
3036 | fFixedAdditional
3037# if IEMNATIVE_HST_SIMD_REG_COUNT < 32
3038 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
3039# endif
3040 ;
3041 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
3042 pReNative->Core.bmGstSimdRegShadows = 0;
3043 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
3044 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
3045
3046 /* Full host register reinit: */
3047 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
3048 {
3049 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
3050 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
3051 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
3052 }
3053
3054 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK | fFixedAdditional;
3055 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3056 {
3057 fRegs &= ~RT_BIT_32(idxReg);
3058 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
3059 }
3060
3061#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
3062 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3063#endif
3064
3065#endif
3066
3067 return pReNative;
3068}
3069
3070
3071/**
3072 * Allocates and initializes the native recompiler state.
3073 *
3074 * This is called the first time an EMT wants to recompile something.
3075 *
3076 * @returns Pointer to the new recompiler state.
3077 * @param pVCpu The cross context virtual CPU structure of the calling
3078 * thread.
3079 * @param pTb The TB that's about to be recompiled.
3080 * @thread EMT(pVCpu)
3081 */
3082static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
3083{
3084 VMCPU_ASSERT_EMT(pVCpu);
3085
3086 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
3087 AssertReturn(pReNative, NULL);
3088
3089 /*
3090 * Try allocate all the buffers and stuff we need.
3091 */
3092 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
3093 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
3094 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
3095#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3096 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
3097#endif
3098 if (RT_LIKELY( pReNative->pInstrBuf
3099 && pReNative->paLabels
3100 && pReNative->paFixups)
3101#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3102 && pReNative->pDbgInfo
3103#endif
3104 )
3105 {
3106 /*
3107 * Set the buffer & array sizes on success.
3108 */
3109 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
3110 pReNative->cLabelsAlloc = _8K;
3111 pReNative->cFixupsAlloc = _16K;
3112#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3113 pReNative->cDbgInfoAlloc = _16K;
3114#endif
3115
3116 /* Other constant stuff: */
3117 pReNative->pVCpu = pVCpu;
3118
3119 /*
3120 * Done, just need to save it and reinit it.
3121 */
3122 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
3123 return iemNativeReInit(pReNative, pTb);
3124 }
3125
3126 /*
3127 * Failed. Cleanup and return.
3128 */
3129 AssertFailed();
3130 RTMemFree(pReNative->pInstrBuf);
3131 RTMemFree(pReNative->paLabels);
3132 RTMemFree(pReNative->paFixups);
3133#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3134 RTMemFree(pReNative->pDbgInfo);
3135#endif
3136 RTMemFree(pReNative);
3137 return NULL;
3138}
3139
3140
3141/**
3142 * Creates a label
3143 *
3144 * If the label does not yet have a defined position,
3145 * call iemNativeLabelDefine() later to set it.
3146 *
3147 * @returns Label ID. Throws VBox status code on failure, so no need to check
3148 * the return value.
3149 * @param pReNative The native recompile state.
3150 * @param enmType The label type.
3151 * @param offWhere The instruction offset of the label. UINT32_MAX if the
3152 * label is not yet defined (default).
3153 * @param uData Data associated with the lable. Only applicable to
3154 * certain type of labels. Default is zero.
3155 */
3156DECL_HIDDEN_THROW(uint32_t)
3157iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3158 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
3159{
3160 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
3161
3162 /*
3163 * Locate existing label definition.
3164 *
3165 * This is only allowed for forward declarations where offWhere=UINT32_MAX
3166 * and uData is zero.
3167 */
3168 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3169 uint32_t const cLabels = pReNative->cLabels;
3170 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
3171#ifndef VBOX_STRICT
3172 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
3173 && offWhere == UINT32_MAX
3174 && uData == 0
3175#endif
3176 )
3177 {
3178#ifndef VBOX_STRICT
3179 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
3180 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3181 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
3182 if (idxLabel < pReNative->cLabels)
3183 return idxLabel;
3184#else
3185 for (uint32_t i = 0; i < cLabels; i++)
3186 if ( paLabels[i].enmType == enmType
3187 && paLabels[i].uData == uData)
3188 {
3189 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3190 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3191 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
3192 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
3193 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3194 return i;
3195 }
3196 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
3197 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3198#endif
3199 }
3200
3201 /*
3202 * Make sure we've got room for another label.
3203 */
3204 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
3205 { /* likely */ }
3206 else
3207 {
3208 uint32_t cNew = pReNative->cLabelsAlloc;
3209 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3210 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3211 cNew *= 2;
3212 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
3213 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
3214 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
3215 pReNative->paLabels = paLabels;
3216 pReNative->cLabelsAlloc = cNew;
3217 }
3218
3219 /*
3220 * Define a new label.
3221 */
3222 paLabels[cLabels].off = offWhere;
3223 paLabels[cLabels].enmType = enmType;
3224 paLabels[cLabels].uData = uData;
3225 pReNative->cLabels = cLabels + 1;
3226
3227 Assert((unsigned)enmType < 64);
3228 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
3229
3230 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3231 {
3232 Assert(uData == 0);
3233 pReNative->aidxUniqueLabels[enmType] = cLabels;
3234 }
3235
3236 if (offWhere != UINT32_MAX)
3237 {
3238#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3239 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3240 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
3241#endif
3242 }
3243 return cLabels;
3244}
3245
3246
3247/**
3248 * Defines the location of an existing label.
3249 *
3250 * @param pReNative The native recompile state.
3251 * @param idxLabel The label to define.
3252 * @param offWhere The position.
3253 */
3254DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
3255{
3256 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
3257 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
3258 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
3259 pLabel->off = offWhere;
3260#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3261 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3262 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
3263#endif
3264}
3265
3266
3267/**
3268 * Looks up a lable.
3269 *
3270 * @returns Label ID if found, UINT32_MAX if not.
3271 */
3272static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3273 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
3274{
3275 Assert((unsigned)enmType < 64);
3276 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
3277 {
3278 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3279 return pReNative->aidxUniqueLabels[enmType];
3280
3281 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3282 uint32_t const cLabels = pReNative->cLabels;
3283 for (uint32_t i = 0; i < cLabels; i++)
3284 if ( paLabels[i].enmType == enmType
3285 && paLabels[i].uData == uData
3286 && ( paLabels[i].off == offWhere
3287 || offWhere == UINT32_MAX
3288 || paLabels[i].off == UINT32_MAX))
3289 return i;
3290 }
3291 return UINT32_MAX;
3292}
3293
3294
3295/**
3296 * Adds a fixup.
3297 *
3298 * @throws VBox status code (int) on failure.
3299 * @param pReNative The native recompile state.
3300 * @param offWhere The instruction offset of the fixup location.
3301 * @param idxLabel The target label ID for the fixup.
3302 * @param enmType The fixup type.
3303 * @param offAddend Fixup addend if applicable to the type. Default is 0.
3304 */
3305DECL_HIDDEN_THROW(void)
3306iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
3307 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
3308{
3309 Assert(idxLabel <= UINT16_MAX);
3310 Assert((unsigned)enmType <= UINT8_MAX);
3311
3312 /*
3313 * Make sure we've room.
3314 */
3315 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
3316 uint32_t const cFixups = pReNative->cFixups;
3317 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
3318 { /* likely */ }
3319 else
3320 {
3321 uint32_t cNew = pReNative->cFixupsAlloc;
3322 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3323 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3324 cNew *= 2;
3325 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
3326 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
3327 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
3328 pReNative->paFixups = paFixups;
3329 pReNative->cFixupsAlloc = cNew;
3330 }
3331
3332 /*
3333 * Add the fixup.
3334 */
3335 paFixups[cFixups].off = offWhere;
3336 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
3337 paFixups[cFixups].enmType = enmType;
3338 paFixups[cFixups].offAddend = offAddend;
3339 pReNative->cFixups = cFixups + 1;
3340}
3341
3342
3343/**
3344 * Slow code path for iemNativeInstrBufEnsure.
3345 */
3346DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
3347{
3348 /* Double the buffer size till we meet the request. */
3349 uint32_t cNew = pReNative->cInstrBufAlloc;
3350 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
3351 do
3352 cNew *= 2;
3353 while (cNew < off + cInstrReq);
3354
3355 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
3356#ifdef RT_ARCH_ARM64
3357 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
3358#else
3359 uint32_t const cbMaxInstrBuf = _2M;
3360#endif
3361 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
3362
3363 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
3364 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
3365
3366#ifdef VBOX_STRICT
3367 pReNative->offInstrBufChecked = off + cInstrReq;
3368#endif
3369 pReNative->cInstrBufAlloc = cNew;
3370 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
3371}
3372
3373#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3374
3375/**
3376 * Grows the static debug info array used during recompilation.
3377 *
3378 * @returns Pointer to the new debug info block; throws VBox status code on
3379 * failure, so no need to check the return value.
3380 */
3381DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3382{
3383 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
3384 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
3385 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
3386 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
3387 pReNative->pDbgInfo = pDbgInfo;
3388 pReNative->cDbgInfoAlloc = cNew;
3389 return pDbgInfo;
3390}
3391
3392
3393/**
3394 * Adds a new debug info uninitialized entry, returning the pointer to it.
3395 */
3396DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3397{
3398 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
3399 { /* likely */ }
3400 else
3401 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
3402 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
3403}
3404
3405
3406/**
3407 * Debug Info: Adds a native offset record, if necessary.
3408 */
3409static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3410{
3411 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
3412
3413 /*
3414 * Search backwards to see if we've got a similar record already.
3415 */
3416 uint32_t idx = pDbgInfo->cEntries;
3417 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
3418 while (idx-- > idxStop)
3419 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
3420 {
3421 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
3422 return;
3423 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
3424 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
3425 break;
3426 }
3427
3428 /*
3429 * Add it.
3430 */
3431 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
3432 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
3433 pEntry->NativeOffset.offNative = off;
3434}
3435
3436
3437/**
3438 * Debug Info: Record info about a label.
3439 */
3440static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
3441{
3442 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3443 pEntry->Label.uType = kIemTbDbgEntryType_Label;
3444 pEntry->Label.uUnused = 0;
3445 pEntry->Label.enmLabel = (uint8_t)enmType;
3446 pEntry->Label.uData = uData;
3447}
3448
3449
3450/**
3451 * Debug Info: Record info about a threaded call.
3452 */
3453static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
3454{
3455 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3456 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
3457 pEntry->ThreadedCall.fRecompiled = fRecompiled;
3458 pEntry->ThreadedCall.uUnused = 0;
3459 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
3460}
3461
3462
3463/**
3464 * Debug Info: Record info about a new guest instruction.
3465 */
3466static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
3467{
3468 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3469 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
3470 pEntry->GuestInstruction.uUnused = 0;
3471 pEntry->GuestInstruction.fExec = fExec;
3472}
3473
3474
3475/**
3476 * Debug Info: Record info about guest register shadowing.
3477 */
3478static void iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
3479 uint8_t idxHstReg = UINT8_MAX, uint8_t idxHstRegPrev = UINT8_MAX)
3480{
3481 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3482 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
3483 pEntry->GuestRegShadowing.uUnused = 0;
3484 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
3485 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
3486 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
3487}
3488
3489
3490# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3491/**
3492 * Debug Info: Record info about guest register shadowing.
3493 */
3494static void iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
3495 uint8_t idxHstSimdReg = UINT8_MAX, uint8_t idxHstSimdRegPrev = UINT8_MAX)
3496{
3497 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3498 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
3499 pEntry->GuestSimdRegShadowing.uUnused = 0;
3500 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
3501 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
3502 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
3503}
3504# endif
3505
3506
3507# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3508/**
3509 * Debug Info: Record info about delayed RIP updates.
3510 */
3511static void iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t offPc, uint32_t cInstrSkipped)
3512{
3513 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3514 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
3515 pEntry->DelayedPcUpdate.offPc = offPc;
3516 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
3517}
3518# endif
3519
3520#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
3521
3522
3523/*********************************************************************************************************************************
3524* Register Allocator *
3525*********************************************************************************************************************************/
3526
3527/**
3528 * Register parameter indexes (indexed by argument number).
3529 */
3530DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
3531{
3532 IEMNATIVE_CALL_ARG0_GREG,
3533 IEMNATIVE_CALL_ARG1_GREG,
3534 IEMNATIVE_CALL_ARG2_GREG,
3535 IEMNATIVE_CALL_ARG3_GREG,
3536#if defined(IEMNATIVE_CALL_ARG4_GREG)
3537 IEMNATIVE_CALL_ARG4_GREG,
3538# if defined(IEMNATIVE_CALL_ARG5_GREG)
3539 IEMNATIVE_CALL_ARG5_GREG,
3540# if defined(IEMNATIVE_CALL_ARG6_GREG)
3541 IEMNATIVE_CALL_ARG6_GREG,
3542# if defined(IEMNATIVE_CALL_ARG7_GREG)
3543 IEMNATIVE_CALL_ARG7_GREG,
3544# endif
3545# endif
3546# endif
3547#endif
3548};
3549
3550/**
3551 * Call register masks indexed by argument count.
3552 */
3553DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
3554{
3555 0,
3556 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
3557 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
3558 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
3559 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3560 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
3561#if defined(IEMNATIVE_CALL_ARG4_GREG)
3562 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3563 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
3564# if defined(IEMNATIVE_CALL_ARG5_GREG)
3565 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3566 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
3567# if defined(IEMNATIVE_CALL_ARG6_GREG)
3568 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3569 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3570 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
3571# if defined(IEMNATIVE_CALL_ARG7_GREG)
3572 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3573 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3574 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
3575# endif
3576# endif
3577# endif
3578#endif
3579};
3580
3581#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
3582/**
3583 * BP offset of the stack argument slots.
3584 *
3585 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
3586 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
3587 */
3588DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
3589{
3590 IEMNATIVE_FP_OFF_STACK_ARG0,
3591# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
3592 IEMNATIVE_FP_OFF_STACK_ARG1,
3593# endif
3594# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
3595 IEMNATIVE_FP_OFF_STACK_ARG2,
3596# endif
3597# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
3598 IEMNATIVE_FP_OFF_STACK_ARG3,
3599# endif
3600};
3601AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
3602#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
3603
3604/**
3605 * Info about shadowed guest register values.
3606 * @see IEMNATIVEGSTREG
3607 */
3608static struct
3609{
3610 /** Offset in VMCPU. */
3611 uint32_t off;
3612 /** The field size. */
3613 uint8_t cb;
3614 /** Name (for logging). */
3615 const char *pszName;
3616} const g_aGstShadowInfo[] =
3617{
3618#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
3619 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
3620 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
3621 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
3622 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
3623 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
3624 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
3625 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
3626 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
3627 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
3628 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
3629 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
3630 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
3631 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
3632 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
3633 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
3634 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
3635 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
3636 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
3637 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
3638 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
3639 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
3640 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
3641 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
3642 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
3643 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
3644 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
3645 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
3646 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
3647 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
3648 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
3649 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
3650 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
3651 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
3652 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
3653 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
3654 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
3655 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
3656 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
3657 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
3658 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
3659 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
3660 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
3661 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
3662 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
3663 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
3664 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
3665 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
3666 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
3667#undef CPUMCTX_OFF_AND_SIZE
3668};
3669AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
3670
3671
3672/** Host CPU general purpose register names. */
3673DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
3674{
3675#ifdef RT_ARCH_AMD64
3676 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
3677#elif RT_ARCH_ARM64
3678 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
3679 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
3680#else
3681# error "port me"
3682#endif
3683};
3684
3685
3686DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
3687 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
3688{
3689 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3690
3691 pReNative->Core.aHstRegs[idxReg].enmWhat = enmWhat;
3692 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3693 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
3694 return (uint8_t)idxReg;
3695}
3696
3697
3698#if 0 /* unused */
3699/**
3700 * Tries to locate a suitable register in the given register mask.
3701 *
3702 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3703 * failed.
3704 *
3705 * @returns Host register number on success, returns UINT8_MAX on failure.
3706 */
3707static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
3708{
3709 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3710 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3711 if (fRegs)
3712 {
3713 /** @todo pick better here: */
3714 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
3715
3716 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3717 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3718 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3719 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3720
3721 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3722 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3723 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3724 return idxReg;
3725 }
3726 return UINT8_MAX;
3727}
3728#endif /* unused */
3729
3730
3731/**
3732 * Locate a register, possibly freeing one up.
3733 *
3734 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3735 * failed.
3736 *
3737 * @returns Host register number on success. Returns UINT8_MAX if no registers
3738 * found, the caller is supposed to deal with this and raise a
3739 * allocation type specific status code (if desired).
3740 *
3741 * @throws VBox status code if we're run into trouble spilling a variable of
3742 * recording debug info. Does NOT throw anything if we're out of
3743 * registers, though.
3744 */
3745static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3746 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3747{
3748 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
3749 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3750 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3751
3752 /*
3753 * Try a freed register that's shadowing a guest register.
3754 */
3755 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3756 if (fRegs)
3757 {
3758 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
3759
3760#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3761 /*
3762 * When we have livness information, we use it to kick out all shadowed
3763 * guest register that will not be needed any more in this TB. If we're
3764 * lucky, this may prevent us from ending up here again.
3765 *
3766 * Note! We must consider the previous entry here so we don't free
3767 * anything that the current threaded function requires (current
3768 * entry is produced by the next threaded function).
3769 */
3770 uint32_t const idxCurCall = pReNative->idxCurCall;
3771 if (idxCurCall > 0)
3772 {
3773 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
3774
3775# ifndef IEMLIVENESS_EXTENDED_LAYOUT
3776 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
3777 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
3778 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
3779#else
3780 /* Construct a mask of the registers not in the read or write state.
3781 Note! We could skips writes, if they aren't from us, as this is just
3782 a hack to prevent trashing registers that have just been written
3783 or will be written when we retire the current instruction. */
3784 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3785 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3786 & IEMLIVENESSBIT_MASK;
3787#endif
3788 /* Merge EFLAGS. */
3789 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
3790 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
3791 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
3792 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
3793 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
3794
3795 /* If it matches any shadowed registers. */
3796 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
3797 {
3798 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
3799 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
3800 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
3801
3802 /* See if we've got any unshadowed registers we can return now. */
3803 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
3804 if (fUnshadowedRegs)
3805 {
3806 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
3807 return (fPreferVolatile
3808 ? ASMBitFirstSetU32(fUnshadowedRegs)
3809 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3810 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
3811 - 1;
3812 }
3813 }
3814 }
3815#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
3816
3817 unsigned const idxReg = (fPreferVolatile
3818 ? ASMBitFirstSetU32(fRegs)
3819 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3820 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
3821 - 1;
3822
3823 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3824 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3825 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3826 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3827
3828 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3829 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3830 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3831 return idxReg;
3832 }
3833
3834 /*
3835 * Try free up a variable that's in a register.
3836 *
3837 * We do two rounds here, first evacuating variables we don't need to be
3838 * saved on the stack, then in the second round move things to the stack.
3839 */
3840 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
3841 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3842 {
3843 uint32_t fVars = pReNative->Core.bmVars;
3844 while (fVars)
3845 {
3846 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3847 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3848 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3849 && (RT_BIT_32(idxReg) & fRegMask)
3850 && ( iLoop == 0
3851 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3852 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3853 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3854 {
3855 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3856 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3857 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3858 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3859 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3860 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3861
3862 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3863 {
3864 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
3865 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3866 }
3867
3868 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3869 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3870
3871 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3872 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3873 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3874 return idxReg;
3875 }
3876 fVars &= ~RT_BIT_32(idxVar);
3877 }
3878 }
3879
3880 return UINT8_MAX;
3881}
3882
3883
3884/**
3885 * Reassigns a variable to a different register specified by the caller.
3886 *
3887 * @returns The new code buffer position.
3888 * @param pReNative The native recompile state.
3889 * @param off The current code buffer position.
3890 * @param idxVar The variable index.
3891 * @param idxRegOld The old host register number.
3892 * @param idxRegNew The new host register number.
3893 * @param pszCaller The caller for logging.
3894 */
3895static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3896 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3897{
3898 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3899 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
3900 RT_NOREF(pszCaller);
3901
3902 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3903
3904 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3905 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
3906 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3907 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3908
3909 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3910 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3911 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3912 if (fGstRegShadows)
3913 {
3914 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3915 | RT_BIT_32(idxRegNew);
3916 while (fGstRegShadows)
3917 {
3918 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3919 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3920
3921 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3922 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3923 }
3924 }
3925
3926 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
3927 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3928 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3929 return off;
3930}
3931
3932
3933/**
3934 * Moves a variable to a different register or spills it onto the stack.
3935 *
3936 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3937 * kinds can easily be recreated if needed later.
3938 *
3939 * @returns The new code buffer position.
3940 * @param pReNative The native recompile state.
3941 * @param off The current code buffer position.
3942 * @param idxVar The variable index.
3943 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3944 * call-volatile registers.
3945 */
3946static uint32_t iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3947 uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK)
3948{
3949 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3950 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3951 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
3952 Assert(!pVar->fRegAcquired);
3953
3954 uint8_t const idxRegOld = pVar->idxReg;
3955 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3956 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3957 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3958 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3959 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3960 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3961 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3962 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3963
3964
3965 /** @todo Add statistics on this.*/
3966 /** @todo Implement basic variable liveness analysis (python) so variables
3967 * can be freed immediately once no longer used. This has the potential to
3968 * be trashing registers and stack for dead variables.
3969 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
3970
3971 /*
3972 * First try move it to a different register, as that's cheaper.
3973 */
3974 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3975 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3976 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3977 if (fRegs)
3978 {
3979 /* Avoid using shadow registers, if possible. */
3980 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3981 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3982 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3983 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3984 }
3985
3986 /*
3987 * Otherwise we must spill the register onto the stack.
3988 */
3989 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3990 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3991 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3992 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3993
3994 pVar->idxReg = UINT8_MAX;
3995 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3996 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3997 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3998 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3999 return off;
4000}
4001
4002
4003/**
4004 * Allocates a temporary host general purpose register.
4005 *
4006 * This may emit code to save register content onto the stack in order to free
4007 * up a register.
4008 *
4009 * @returns The host register number; throws VBox status code on failure,
4010 * so no need to check the return value.
4011 * @param pReNative The native recompile state.
4012 * @param poff Pointer to the variable with the code buffer position.
4013 * This will be update if we need to move a variable from
4014 * register to stack in order to satisfy the request.
4015 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4016 * registers (@c true, default) or the other way around
4017 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4018 */
4019DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
4020{
4021 /*
4022 * Try find a completely unused register, preferably a call-volatile one.
4023 */
4024 uint8_t idxReg;
4025 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4026 & ~pReNative->Core.bmHstRegsWithGstShadow
4027 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
4028 if (fRegs)
4029 {
4030 if (fPreferVolatile)
4031 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4032 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4033 else
4034 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4035 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4036 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4037 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4038 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4039 }
4040 else
4041 {
4042 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
4043 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4044 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4045 }
4046 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4047}
4048
4049
4050/**
4051 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
4052 * registers.
4053 *
4054 * @returns The host register number; throws VBox status code on failure,
4055 * so no need to check the return value.
4056 * @param pReNative The native recompile state.
4057 * @param poff Pointer to the variable with the code buffer position.
4058 * This will be update if we need to move a variable from
4059 * register to stack in order to satisfy the request.
4060 * @param fRegMask Mask of acceptable registers.
4061 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4062 * registers (@c true, default) or the other way around
4063 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4064 */
4065DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
4066 bool fPreferVolatile /*= true*/)
4067{
4068 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
4069 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
4070
4071 /*
4072 * Try find a completely unused register, preferably a call-volatile one.
4073 */
4074 uint8_t idxReg;
4075 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4076 & ~pReNative->Core.bmHstRegsWithGstShadow
4077 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
4078 & fRegMask;
4079 if (fRegs)
4080 {
4081 if (fPreferVolatile)
4082 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4083 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4084 else
4085 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4086 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4087 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4088 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4089 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4090 }
4091 else
4092 {
4093 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
4094 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4095 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4096 }
4097 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4098}
4099
4100
4101/**
4102 * Allocates a temporary register for loading an immediate value into.
4103 *
4104 * This will emit code to load the immediate, unless there happens to be an
4105 * unused register with the value already loaded.
4106 *
4107 * The caller will not modify the returned register, it must be considered
4108 * read-only. Free using iemNativeRegFreeTmpImm.
4109 *
4110 * @returns The host register number; throws VBox status code on failure, so no
4111 * need to check the return value.
4112 * @param pReNative The native recompile state.
4113 * @param poff Pointer to the variable with the code buffer position.
4114 * @param uImm The immediate value that the register must hold upon
4115 * return.
4116 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4117 * registers (@c true, default) or the other way around
4118 * (@c false).
4119 *
4120 * @note Reusing immediate values has not been implemented yet.
4121 */
4122DECL_HIDDEN_THROW(uint8_t)
4123iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
4124{
4125 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
4126 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
4127 return idxReg;
4128}
4129
4130#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4131
4132# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4133/**
4134 * Helper for iemNativeLivenessGetStateByGstReg.
4135 *
4136 * @returns IEMLIVENESS_STATE_XXX
4137 * @param fMergedStateExp2 This is the RT_BIT_32() of each sub-state
4138 * ORed together.
4139 */
4140DECL_FORCE_INLINE(uint32_t)
4141iemNativeLivenessMergeExpandedEFlagsState(uint32_t fMergedStateExp2)
4142{
4143 /* INPUT trumps anything else. */
4144 if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_INPUT))
4145 return IEMLIVENESS_STATE_INPUT;
4146
4147 /* CLOBBERED trumps XCPT_OR_CALL and UNUSED. */
4148 if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_CLOBBERED))
4149 {
4150 /* If not all sub-fields are clobbered they must be considered INPUT. */
4151 if (fMergedStateExp2 & (RT_BIT_32(IEMLIVENESS_STATE_UNUSED) | RT_BIT_32(IEMLIVENESS_STATE_XCPT_OR_CALL)))
4152 return IEMLIVENESS_STATE_INPUT;
4153 return IEMLIVENESS_STATE_CLOBBERED;
4154 }
4155
4156 /* XCPT_OR_CALL trumps UNUSED. */
4157 if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_XCPT_OR_CALL))
4158 return IEMLIVENESS_STATE_XCPT_OR_CALL;
4159
4160 return IEMLIVENESS_STATE_UNUSED;
4161}
4162# endif /* !IEMLIVENESS_EXTENDED_LAYOUT */
4163
4164
4165DECL_FORCE_INLINE(uint32_t)
4166iemNativeLivenessGetStateByGstRegEx(PCIEMLIVENESSENTRY pLivenessEntry, unsigned enmGstRegEx)
4167{
4168# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4169 return ((pLivenessEntry->Bit0.bm64 >> enmGstRegEx) & 1)
4170 | (((pLivenessEntry->Bit1.bm64 >> enmGstRegEx) << 1) & 2);
4171# else
4172 return ( (pLivenessEntry->Bit0.bm64 >> enmGstRegEx) & 1)
4173 | (((pLivenessEntry->Bit1.bm64 >> enmGstRegEx) << 1) & 2)
4174 | (((pLivenessEntry->Bit2.bm64 >> enmGstRegEx) << 2) & 4)
4175 | (((pLivenessEntry->Bit3.bm64 >> enmGstRegEx) << 2) & 8);
4176# endif
4177}
4178
4179
4180DECL_FORCE_INLINE(uint32_t)
4181iemNativeLivenessGetStateByGstReg(PCIEMLIVENESSENTRY pLivenessEntry, IEMNATIVEGSTREG enmGstReg)
4182{
4183 uint32_t uRet = iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, (unsigned)enmGstReg);
4184 if (enmGstReg == kIemNativeGstReg_EFlags)
4185 {
4186 /* Merge the eflags states to one. */
4187# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4188 uRet = RT_BIT_32(uRet);
4189 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflCf | (pLivenessEntry->Bit1.fEflCf << 1));
4190 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflPf | (pLivenessEntry->Bit1.fEflPf << 1));
4191 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflAf | (pLivenessEntry->Bit1.fEflAf << 1));
4192 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflZf | (pLivenessEntry->Bit1.fEflZf << 1));
4193 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflSf | (pLivenessEntry->Bit1.fEflSf << 1));
4194 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflOf | (pLivenessEntry->Bit1.fEflOf << 1));
4195 uRet = iemNativeLivenessMergeExpandedEFlagsState(uRet);
4196# else
4197 AssertCompile(IEMLIVENESSBIT_IDX_EFL_OTHER == (unsigned)kIemNativeGstReg_EFlags);
4198 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_CF);
4199 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_PF);
4200 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_AF);
4201 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_ZF);
4202 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_SF);
4203 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_OF);
4204# endif
4205 }
4206 return uRet;
4207}
4208
4209
4210# ifdef VBOX_STRICT
4211/** For assertions only, user checks that idxCurCall isn't zerow. */
4212DECL_FORCE_INLINE(uint32_t)
4213iemNativeLivenessGetPrevStateByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg)
4214{
4215 return iemNativeLivenessGetStateByGstReg(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], enmGstReg);
4216}
4217# endif /* VBOX_STRICT */
4218
4219#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4220
4221/**
4222 * Marks host register @a idxHstReg as containing a shadow copy of guest
4223 * register @a enmGstReg.
4224 *
4225 * ASSUMES that caller has made sure @a enmGstReg is not associated with any
4226 * host register before calling.
4227 */
4228DECL_FORCE_INLINE(void)
4229iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
4230{
4231 Assert(!(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg)));
4232 Assert(!pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4233 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
4234
4235 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxHstReg;
4236 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg); /** @todo why? not OR? */
4237 pReNative->Core.bmGstRegShadows |= RT_BIT_64(enmGstReg);
4238 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
4239#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4240 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4241 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxHstReg);
4242#else
4243 RT_NOREF(off);
4244#endif
4245}
4246
4247
4248/**
4249 * Clear any guest register shadow claims from @a idxHstReg.
4250 *
4251 * The register does not need to be shadowing any guest registers.
4252 */
4253DECL_FORCE_INLINE(void)
4254iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off)
4255{
4256 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4257 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
4258 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4259 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4260 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4261
4262#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4263 uint64_t fGstRegs = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4264 if (fGstRegs)
4265 {
4266 Assert(fGstRegs < RT_BIT_64(kIemNativeGstReg_End));
4267 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4268 while (fGstRegs)
4269 {
4270 unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4271 fGstRegs &= ~RT_BIT_64(iGstReg);
4272 iemNativeDbgInfoAddGuestRegShadowing(pReNative, (IEMNATIVEGSTREG)iGstReg, UINT8_MAX, idxHstReg);
4273 }
4274 }
4275#else
4276 RT_NOREF(off);
4277#endif
4278
4279 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4280 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4281 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4282}
4283
4284
4285/**
4286 * Clear guest register shadow claim regarding @a enmGstReg from @a idxHstReg
4287 * and global overview flags.
4288 */
4289DECL_FORCE_INLINE(void)
4290iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
4291{
4292 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4293 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4294 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
4295 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4296 Assert(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg));
4297 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4298 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4299
4300#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4301 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4302 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, UINT8_MAX, idxHstReg);
4303#else
4304 RT_NOREF(off);
4305#endif
4306
4307 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~RT_BIT_64(enmGstReg);
4308 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4309 if (!fGstRegShadowsNew)
4310 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4311 pReNative->Core.bmGstRegShadows &= ~RT_BIT_64(enmGstReg);
4312}
4313
4314
4315#if 0 /* unused */
4316/**
4317 * Clear any guest register shadow claim for @a enmGstReg.
4318 */
4319DECL_FORCE_INLINE(void)
4320iemNativeRegClearGstRegShadowingByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg, uint32_t off)
4321{
4322 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4323 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4324 {
4325 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] < RT_ELEMENTS(pReNative->Core.aHstRegs));
4326 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
4327 }
4328}
4329#endif
4330
4331
4332/**
4333 * Clear any guest register shadow claim for @a enmGstReg and mark @a idxHstRegNew
4334 * as the new shadow of it.
4335 *
4336 * Unlike the other guest reg shadow helpers, this does the logging for you.
4337 * However, it is the liveness state is not asserted here, the caller must do
4338 * that.
4339 */
4340DECL_FORCE_INLINE(void)
4341iemNativeRegClearAndMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstRegNew,
4342 IEMNATIVEGSTREG enmGstReg, uint32_t off)
4343{
4344 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4345 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4346 {
4347 uint8_t const idxHstRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
4348 Assert(idxHstRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
4349 if (idxHstRegOld == idxHstRegNew)
4350 return;
4351 Log12(("iemNativeRegClearAndMarkAsGstRegShadow: %s for guest %s (from %s)\n", g_apszIemNativeHstRegNames[idxHstRegNew],
4352 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstRegOld]));
4353 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
4354 }
4355 else
4356 Log12(("iemNativeRegClearAndMarkAsGstRegShadow: %s for guest %s\n", g_apszIemNativeHstRegNames[idxHstRegNew],
4357 g_aGstShadowInfo[enmGstReg].pszName));
4358 iemNativeRegMarkAsGstRegShadow(pReNative, idxHstRegNew, enmGstReg, off);
4359}
4360
4361
4362/**
4363 * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
4364 * to @a idxRegTo.
4365 */
4366DECL_FORCE_INLINE(void)
4367iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo,
4368 IEMNATIVEGSTREG enmGstReg, uint32_t off)
4369{
4370 Assert(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
4371 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] == idxRegFrom);
4372 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows)
4373 == pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows
4374 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4375 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows)
4376 == pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows);
4377 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
4378 == RT_BOOL(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows));
4379
4380 uint64_t const fGstRegShadowsFrom = pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & ~RT_BIT_64(enmGstReg);
4381 pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows = fGstRegShadowsFrom;
4382 if (!fGstRegShadowsFrom)
4383 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegFrom);
4384 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxRegTo);
4385 pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows |= RT_BIT_64(enmGstReg);
4386 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxRegTo;
4387#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4388 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4389 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxRegTo, idxRegFrom);
4390#else
4391 RT_NOREF(off);
4392#endif
4393}
4394
4395
4396/**
4397 * Allocates a temporary host general purpose register for keeping a guest
4398 * register value.
4399 *
4400 * Since we may already have a register holding the guest register value,
4401 * code will be emitted to do the loading if that's not the case. Code may also
4402 * be emitted if we have to free up a register to satify the request.
4403 *
4404 * @returns The host register number; throws VBox status code on failure, so no
4405 * need to check the return value.
4406 * @param pReNative The native recompile state.
4407 * @param poff Pointer to the variable with the code buffer
4408 * position. This will be update if we need to move a
4409 * variable from register to stack in order to satisfy
4410 * the request.
4411 * @param enmGstReg The guest register that will is to be updated.
4412 * @param enmIntendedUse How the caller will be using the host register.
4413 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
4414 * register is okay (default). The ASSUMPTION here is
4415 * that the caller has already flushed all volatile
4416 * registers, so this is only applied if we allocate a
4417 * new register.
4418 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
4419 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
4420 */
4421DECL_HIDDEN_THROW(uint8_t)
4422iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
4423 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
4424 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
4425{
4426 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4427#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4428 AssertMsg( fSkipLivenessAssert
4429 || pReNative->idxCurCall == 0
4430 || enmGstReg == kIemNativeGstReg_Pc
4431 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4432 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4433 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
4434 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4435 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
4436 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4437#endif
4438 RT_NOREF(fSkipLivenessAssert);
4439#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
4440 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
4441#endif
4442 uint32_t const fRegMask = !fNoVolatileRegs
4443 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
4444 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4445
4446 /*
4447 * First check if the guest register value is already in a host register.
4448 */
4449 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4450 {
4451 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4452 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4453 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4454 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4455
4456 /* It's not supposed to be allocated... */
4457 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4458 {
4459 /*
4460 * If the register will trash the guest shadow copy, try find a
4461 * completely unused register we can use instead. If that fails,
4462 * we need to disassociate the host reg from the guest reg.
4463 */
4464 /** @todo would be nice to know if preserving the register is in any way helpful. */
4465 /* If the purpose is calculations, try duplicate the register value as
4466 we'll be clobbering the shadow. */
4467 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
4468 && ( ~pReNative->Core.bmHstRegs
4469 & ~pReNative->Core.bmHstRegsWithGstShadow
4470 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
4471 {
4472 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
4473
4474 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4475
4476 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4477 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4478 g_apszIemNativeHstRegNames[idxRegNew]));
4479 idxReg = idxRegNew;
4480 }
4481 /* If the current register matches the restrictions, go ahead and allocate
4482 it for the caller. */
4483 else if (fRegMask & RT_BIT_32(idxReg))
4484 {
4485 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4486 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4487 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4488 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4489 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
4490 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4491 else
4492 {
4493 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
4494 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
4495 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4496 }
4497 }
4498 /* Otherwise, allocate a register that satisfies the caller and transfer
4499 the shadowing if compatible with the intended use. (This basically
4500 means the call wants a non-volatile register (RSP push/pop scenario).) */
4501 else
4502 {
4503 Assert(fNoVolatileRegs);
4504 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
4505 !fNoVolatileRegs
4506 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
4507 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4508 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4509 {
4510 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4511 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
4512 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
4513 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4514 }
4515 else
4516 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4517 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4518 g_apszIemNativeHstRegNames[idxRegNew]));
4519 idxReg = idxRegNew;
4520 }
4521 }
4522 else
4523 {
4524 /*
4525 * Oops. Shadowed guest register already allocated!
4526 *
4527 * Allocate a new register, copy the value and, if updating, the
4528 * guest shadow copy assignment to the new register.
4529 */
4530 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4531 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
4532 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
4533 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
4534
4535 /** @todo share register for readonly access. */
4536 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
4537 enmIntendedUse == kIemNativeGstRegUse_Calculation);
4538
4539 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4540 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4541
4542 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4543 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4544 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
4545 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4546 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4547 else
4548 {
4549 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4550 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
4551 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4552 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4553 }
4554 idxReg = idxRegNew;
4555 }
4556 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
4557
4558#ifdef VBOX_STRICT
4559 /* Strict builds: Check that the value is correct. */
4560 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4561#endif
4562
4563 return idxReg;
4564 }
4565
4566 /*
4567 * Allocate a new register, load it with the guest value and designate it as a copy of the
4568 */
4569 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
4570
4571 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4572 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
4573
4574 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4575 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
4576 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
4577 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4578
4579 return idxRegNew;
4580}
4581
4582
4583/**
4584 * Allocates a temporary host general purpose register that already holds the
4585 * given guest register value.
4586 *
4587 * The use case for this function is places where the shadowing state cannot be
4588 * modified due to branching and such. This will fail if the we don't have a
4589 * current shadow copy handy or if it's incompatible. The only code that will
4590 * be emitted here is value checking code in strict builds.
4591 *
4592 * The intended use can only be readonly!
4593 *
4594 * @returns The host register number, UINT8_MAX if not present.
4595 * @param pReNative The native recompile state.
4596 * @param poff Pointer to the instruction buffer offset.
4597 * Will be updated in strict builds if a register is
4598 * found.
4599 * @param enmGstReg The guest register that will is to be updated.
4600 * @note In strict builds, this may throw instruction buffer growth failures.
4601 * Non-strict builds will not throw anything.
4602 * @sa iemNativeRegAllocTmpForGuestReg
4603 */
4604DECL_HIDDEN_THROW(uint8_t)
4605iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4606{
4607 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4608#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4609 AssertMsg( pReNative->idxCurCall == 0
4610 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4611 || enmGstReg == kIemNativeGstReg_Pc,
4612 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4613#endif
4614
4615 /*
4616 * First check if the guest register value is already in a host register.
4617 */
4618 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4619 {
4620 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4621 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4622 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4623 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4624
4625 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4626 {
4627 /*
4628 * We only do readonly use here, so easy compared to the other
4629 * variant of this code.
4630 */
4631 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4632 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4633 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4634 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
4635 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4636
4637#ifdef VBOX_STRICT
4638 /* Strict builds: Check that the value is correct. */
4639 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4640#else
4641 RT_NOREF(poff);
4642#endif
4643 return idxReg;
4644 }
4645 }
4646
4647 return UINT8_MAX;
4648}
4649
4650
4651/**
4652 * Allocates argument registers for a function call.
4653 *
4654 * @returns New code buffer offset on success; throws VBox status code on failure, so no
4655 * need to check the return value.
4656 * @param pReNative The native recompile state.
4657 * @param off The current code buffer offset.
4658 * @param cArgs The number of arguments the function call takes.
4659 */
4660DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
4661{
4662 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
4663 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
4664 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4665 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4666
4667 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4668 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4669 else if (cArgs == 0)
4670 return true;
4671
4672 /*
4673 * Do we get luck and all register are free and not shadowing anything?
4674 */
4675 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
4676 for (uint32_t i = 0; i < cArgs; i++)
4677 {
4678 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4679 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4680 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4681 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4682 }
4683 /*
4684 * Okay, not lucky so we have to free up the registers.
4685 */
4686 else
4687 for (uint32_t i = 0; i < cArgs; i++)
4688 {
4689 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4690 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
4691 {
4692 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4693 {
4694 case kIemNativeWhat_Var:
4695 {
4696 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4697 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4698 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
4699 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4700 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
4701
4702 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
4703 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4704 else
4705 {
4706 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4707 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4708 }
4709 break;
4710 }
4711
4712 case kIemNativeWhat_Tmp:
4713 case kIemNativeWhat_Arg:
4714 case kIemNativeWhat_rc:
4715 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4716 default:
4717 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
4718 }
4719
4720 }
4721 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4722 {
4723 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4724 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4725 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4726 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4727 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4728 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4729 }
4730 else
4731 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4732 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4733 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4734 }
4735 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
4736 return true;
4737}
4738
4739
4740DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
4741
4742
4743#if 0
4744/**
4745 * Frees a register assignment of any type.
4746 *
4747 * @param pReNative The native recompile state.
4748 * @param idxHstReg The register to free.
4749 *
4750 * @note Does not update variables.
4751 */
4752DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4753{
4754 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4755 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4756 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
4757 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
4758 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
4759 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
4760 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
4761 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
4762 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
4763 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
4764 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4765 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4766 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4767 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4768
4769 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4770 /* no flushing, right:
4771 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4772 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4773 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4774 */
4775}
4776#endif
4777
4778
4779/**
4780 * Frees a temporary register.
4781 *
4782 * Any shadow copies of guest registers assigned to the host register will not
4783 * be flushed by this operation.
4784 */
4785DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4786{
4787 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4788 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
4789 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4790 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
4791 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4792}
4793
4794
4795/**
4796 * Frees a temporary immediate register.
4797 *
4798 * It is assumed that the call has not modified the register, so it still hold
4799 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
4800 */
4801DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4802{
4803 iemNativeRegFreeTmp(pReNative, idxHstReg);
4804}
4805
4806
4807/**
4808 * Frees a register assigned to a variable.
4809 *
4810 * The register will be disassociated from the variable.
4811 */
4812DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4813{
4814 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4815 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4816 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4817 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4818 Assert(pReNative->Core.aVars[idxVar].idxReg == idxHstReg);
4819
4820 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4821 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4822 if (!fFlushShadows)
4823 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4824 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4825 else
4826 {
4827 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4828 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4829 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4830 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4831 uint64_t fGstRegShadows = fGstRegShadowsOld;
4832 while (fGstRegShadows)
4833 {
4834 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4835 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4836
4837 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4838 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4839 }
4840 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4841 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4842 }
4843}
4844
4845
4846/**
4847 * Called right before emitting a call instruction to move anything important
4848 * out of call-volatile registers, free and flush the call-volatile registers,
4849 * optionally freeing argument variables.
4850 *
4851 * @returns New code buffer offset, UINT32_MAX on failure.
4852 * @param pReNative The native recompile state.
4853 * @param off The code buffer offset.
4854 * @param cArgs The number of arguments the function call takes.
4855 * It is presumed that the host register part of these have
4856 * been allocated as such already and won't need moving,
4857 * just freeing.
4858 * @param fKeepVars Mask of variables that should keep their register
4859 * assignments. Caller must take care to handle these.
4860 */
4861DECL_HIDDEN_THROW(uint32_t)
4862iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4863{
4864 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4865
4866 /* fKeepVars will reduce this mask. */
4867 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4868
4869 /*
4870 * Move anything important out of volatile registers.
4871 */
4872 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4873 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4874 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4875#ifdef IEMNATIVE_REG_FIXED_TMP0
4876 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4877#endif
4878#ifdef IEMNATIVE_REG_FIXED_TMP1
4879 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
4880#endif
4881#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4882 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4883#endif
4884 & ~g_afIemNativeCallRegs[cArgs];
4885
4886 fRegsToMove &= pReNative->Core.bmHstRegs;
4887 if (!fRegsToMove)
4888 { /* likely */ }
4889 else
4890 {
4891 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4892 while (fRegsToMove != 0)
4893 {
4894 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4895 fRegsToMove &= ~RT_BIT_32(idxReg);
4896
4897 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4898 {
4899 case kIemNativeWhat_Var:
4900 {
4901 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4902 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4903 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4904 Assert(pVar->idxReg == idxReg);
4905 if (!(RT_BIT_32(idxVar) & fKeepVars))
4906 {
4907 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4908 idxVar, pVar->enmKind, pVar->idxReg));
4909 if (pVar->enmKind != kIemNativeVarKind_Stack)
4910 pVar->idxReg = UINT8_MAX;
4911 else
4912 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4913 }
4914 else
4915 fRegsToFree &= ~RT_BIT_32(idxReg);
4916 continue;
4917 }
4918
4919 case kIemNativeWhat_Arg:
4920 AssertMsgFailed(("What?!?: %u\n", idxReg));
4921 continue;
4922
4923 case kIemNativeWhat_rc:
4924 case kIemNativeWhat_Tmp:
4925 AssertMsgFailed(("Missing free: %u\n", idxReg));
4926 continue;
4927
4928 case kIemNativeWhat_FixedTmp:
4929 case kIemNativeWhat_pVCpuFixed:
4930 case kIemNativeWhat_pCtxFixed:
4931 case kIemNativeWhat_PcShadow:
4932 case kIemNativeWhat_FixedReserved:
4933 case kIemNativeWhat_Invalid:
4934 case kIemNativeWhat_End:
4935 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4936 }
4937 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4938 }
4939 }
4940
4941 /*
4942 * Do the actual freeing.
4943 */
4944 if (pReNative->Core.bmHstRegs & fRegsToFree)
4945 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4946 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4947 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4948
4949 /* If there are guest register shadows in any call-volatile register, we
4950 have to clear the corrsponding guest register masks for each register. */
4951 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4952 if (fHstRegsWithGstShadow)
4953 {
4954 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4955 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
4956 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4957 do
4958 {
4959 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4960 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4961
4962 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4963 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4964 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4965 } while (fHstRegsWithGstShadow != 0);
4966 }
4967
4968 return off;
4969}
4970
4971
4972/**
4973 * Flushes a set of guest register shadow copies.
4974 *
4975 * This is usually done after calling a threaded function or a C-implementation
4976 * of an instruction.
4977 *
4978 * @param pReNative The native recompile state.
4979 * @param fGstRegs Set of guest registers to flush.
4980 */
4981DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4982{
4983 /*
4984 * Reduce the mask by what's currently shadowed
4985 */
4986 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4987 fGstRegs &= bmGstRegShadowsOld;
4988 if (fGstRegs)
4989 {
4990 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4991 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4992 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4993 if (bmGstRegShadowsNew)
4994 {
4995 /*
4996 * Partial.
4997 */
4998 do
4999 {
5000 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5001 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5002 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5003 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
5004 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5005
5006 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
5007 fGstRegs &= ~fInThisHstReg;
5008 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5009 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5010 if (!fGstRegShadowsNew)
5011 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5012 } while (fGstRegs != 0);
5013 }
5014 else
5015 {
5016 /*
5017 * Clear all.
5018 */
5019 do
5020 {
5021 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5022 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5023 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5024 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
5025 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5026
5027 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5028 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5029 } while (fGstRegs != 0);
5030 pReNative->Core.bmHstRegsWithGstShadow = 0;
5031 }
5032 }
5033}
5034
5035
5036/**
5037 * Flushes guest register shadow copies held by a set of host registers.
5038 *
5039 * This is used with the TLB lookup code for ensuring that we don't carry on
5040 * with any guest shadows in volatile registers, as these will get corrupted by
5041 * a TLB miss.
5042 *
5043 * @param pReNative The native recompile state.
5044 * @param fHstRegs Set of host registers to flush guest shadows for.
5045 */
5046DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
5047{
5048 /*
5049 * Reduce the mask by what's currently shadowed.
5050 */
5051 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
5052 fHstRegs &= bmHstRegsWithGstShadowOld;
5053 if (fHstRegs)
5054 {
5055 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
5056 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
5057 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
5058 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
5059 if (bmHstRegsWithGstShadowNew)
5060 {
5061 /*
5062 * Partial (likely).
5063 */
5064 uint64_t fGstShadows = 0;
5065 do
5066 {
5067 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5068 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
5069 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
5070 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
5071
5072 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5073 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5074 fHstRegs &= ~RT_BIT_32(idxHstReg);
5075 } while (fHstRegs != 0);
5076 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
5077 }
5078 else
5079 {
5080 /*
5081 * Clear all.
5082 */
5083 do
5084 {
5085 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5086 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
5087 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
5088 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
5089
5090 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5091 fHstRegs &= ~RT_BIT_32(idxHstReg);
5092 } while (fHstRegs != 0);
5093 pReNative->Core.bmGstRegShadows = 0;
5094 }
5095 }
5096}
5097
5098
5099/**
5100 * Restores guest shadow copies in volatile registers.
5101 *
5102 * This is used after calling a helper function (think TLB miss) to restore the
5103 * register state of volatile registers.
5104 *
5105 * @param pReNative The native recompile state.
5106 * @param off The code buffer offset.
5107 * @param fHstRegsActiveShadows Set of host registers which are allowed to
5108 * be active (allocated) w/o asserting. Hack.
5109 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
5110 * iemNativeVarRestoreVolatileRegsPostHlpCall()
5111 */
5112DECL_HIDDEN_THROW(uint32_t)
5113iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
5114{
5115 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
5116 if (fHstRegs)
5117 {
5118 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
5119 do
5120 {
5121 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5122
5123 /* It's not fatal if a register is active holding a variable that
5124 shadowing a guest register, ASSUMING all pending guest register
5125 writes were flushed prior to the helper call. However, we'll be
5126 emitting duplicate restores, so it wasts code space. */
5127 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
5128 RT_NOREF(fHstRegsActiveShadows);
5129
5130 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5131 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
5132 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
5133 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
5134
5135 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
5136 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
5137
5138 fHstRegs &= ~RT_BIT_32(idxHstReg);
5139 } while (fHstRegs != 0);
5140 }
5141 return off;
5142}
5143
5144
5145#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5146# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
5147static uint32_t iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5148{
5149 /* Compare the shadow with the context value, they should match. */
5150 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
5151 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
5152 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
5153 return off;
5154}
5155# endif
5156
5157/**
5158 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
5159 */
5160static uint32_t
5161iemNativeEmitPcWriteback(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5162{
5163 if (pReNative->Core.offPc)
5164 {
5165# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5166 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5167 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, pReNative->Core.cInstrPcUpdateSkipped);
5168# endif
5169
5170# ifndef IEMNATIVE_REG_FIXED_PC_DBG
5171 /* Allocate a temporary PC register. */
5172 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5173
5174 /* Perform the addition and store the result. */
5175 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5176 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5177
5178 /* Free but don't flush the PC register. */
5179 iemNativeRegFreeTmp(pReNative, idxPcReg);
5180# else
5181 /* Compare the shadow with the context value, they should match. */
5182 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
5183 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
5184# endif
5185
5186 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, pReNative->Core.cInstrPcUpdateSkipped);
5187 pReNative->Core.offPc = 0;
5188 pReNative->Core.cInstrPcUpdateSkipped = 0;
5189 }
5190# if 0 /*def IEMNATIVE_WITH_TB_DEBUG_INFO*/
5191 else
5192 {
5193 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5194 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc);
5195 }
5196# endif
5197
5198 return off;
5199}
5200#endif
5201
5202
5203#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5204
5205
5206/*********************************************************************************************************************************
5207* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
5208*********************************************************************************************************************************/
5209
5210/**
5211 * Info about shadowed guest SIMD register values.
5212 * @see IEMNATIVEGSTSIMDREG
5213 */
5214static struct
5215{
5216 /** Offset in VMCPU of XMM (low 128-bit) registers. */
5217 uint32_t offXmm;
5218 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
5219 uint32_t offYmm;
5220 /** Name (for logging). */
5221 const char *pszName;
5222} const g_aGstSimdShadowInfo[] =
5223{
5224#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
5225 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
5226 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
5227 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
5228 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
5229 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
5230 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
5231 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
5232 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
5233 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
5234 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
5235 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
5236 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
5237 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
5238 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
5239 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
5240 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
5241 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
5242#undef CPUMCTX_OFF_AND_SIZE
5243};
5244AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
5245
5246
5247#ifdef LOG_ENABLED
5248/** Host CPU SIMD register names. */
5249DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
5250{
5251#ifdef RT_ARCH_AMD64
5252 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
5253#elif RT_ARCH_ARM64
5254 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
5255 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
5256#else
5257# error "port me"
5258#endif
5259};
5260#endif
5261
5262
5263DECL_FORCE_INLINE(uint8_t) iemNativeSimdRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, uint8_t idxSimdReg,
5264 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
5265{
5266 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5267
5268 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = enmWhat;
5269 pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows = 0;
5270 RT_NOREF(idxVar);
5271 return idxSimdReg;
5272}
5273
5274
5275/**
5276 * Frees a temporary SIMD register.
5277 *
5278 * Any shadow copies of guest registers assigned to the host register will not
5279 * be flushed by this operation.
5280 */
5281DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
5282{
5283 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
5284 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
5285 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5286 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
5287 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5288}
5289
5290
5291/**
5292 * Locate a register, possibly freeing one up.
5293 *
5294 * This ASSUMES the caller has done the minimal/optimal allocation checks and
5295 * failed.
5296 *
5297 * @returns Host register number on success. Returns UINT8_MAX if no registers
5298 * found, the caller is supposed to deal with this and raise a
5299 * allocation type specific status code (if desired).
5300 *
5301 * @throws VBox status code if we're run into trouble spilling a variable of
5302 * recording debug info. Does NOT throw anything if we're out of
5303 * registers, though.
5304 */
5305static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
5306 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
5307{
5308 //STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
5309 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5310 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5311
5312 AssertFailed();
5313
5314 /*
5315 * Try a freed register that's shadowing a guest register.
5316 */
5317 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
5318 if (fRegs)
5319 {
5320 //STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
5321
5322#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5323 /*
5324 * When we have livness information, we use it to kick out all shadowed
5325 * guest register that will not be needed any more in this TB. If we're
5326 * lucky, this may prevent us from ending up here again.
5327 *
5328 * Note! We must consider the previous entry here so we don't free
5329 * anything that the current threaded function requires (current
5330 * entry is produced by the next threaded function).
5331 */
5332 uint32_t const idxCurCall = pReNative->idxCurCall;
5333 if (idxCurCall > 0)
5334 {
5335 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
5336
5337# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5338 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
5339 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
5340 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
5341#else
5342 /* Construct a mask of the registers not in the read or write state.
5343 Note! We could skips writes, if they aren't from us, as this is just
5344 a hack to prevent trashing registers that have just been written
5345 or will be written when we retire the current instruction. */
5346 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5347 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5348 & IEMLIVENESSBIT_MASK;
5349#endif
5350 /* Merge EFLAGS. */
5351 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
5352 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
5353 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
5354 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
5355 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
5356
5357 /* If it matches any shadowed registers. */
5358 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
5359 {
5360 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
5361 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
5362 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
5363
5364 /* See if we've got any unshadowed registers we can return now. */
5365 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
5366 if (fUnshadowedRegs)
5367 {
5368 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
5369 return (fPreferVolatile
5370 ? ASMBitFirstSetU32(fUnshadowedRegs)
5371 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
5372 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
5373 - 1;
5374 }
5375 }
5376 }
5377#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5378
5379 unsigned const idxReg = (fPreferVolatile
5380 ? ASMBitFirstSetU32(fRegs)
5381 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5382 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
5383 - 1;
5384
5385 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
5386 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
5387 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5388 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
5389 Assert(pReNative->Core.aHstSimdRegs[idxReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5390
5391 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5392 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5393 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5394 return idxReg;
5395 }
5396
5397 /*
5398 * Try free up a variable that's in a register.
5399 *
5400 * We do two rounds here, first evacuating variables we don't need to be
5401 * saved on the stack, then in the second round move things to the stack.
5402 */
5403 //STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
5404 AssertReleaseFailed(); /** @todo */
5405#if 0
5406 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
5407 {
5408 uint32_t fVars = pReNative->Core.bmSimdVars;
5409 while (fVars)
5410 {
5411 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
5412 uint8_t const idxReg = pReNative->Core.aSimdVars[idxVar].idxReg;
5413 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
5414 && (RT_BIT_32(idxReg) & fRegMask)
5415 && ( iLoop == 0
5416 ? pReNative->Core.aSimdVars[idxVar].enmKind != kIemNativeVarKind_Stack
5417 : pReNative->Core.aSimdVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5418 && !pReNative->Core.aSimdVars[idxVar].fRegAcquired)
5419 {
5420 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
5421 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
5422 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5423 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5424 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
5425 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
5426
5427 if (pReNative->Core.aSimdVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5428 {
5429 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
5430 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
5431 }
5432
5433 pReNative->Core.aSimdVars[idxVar].idxReg = UINT8_MAX;
5434 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
5435
5436 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5437 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5438 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5439 return idxReg;
5440 }
5441 fVars &= ~RT_BIT_32(idxVar);
5442 }
5443 }
5444#else
5445 RT_NOREF(poff);
5446#endif
5447
5448 return UINT8_MAX;
5449}
5450
5451
5452/**
5453 * Marks host SIMD register @a idxHstSimdReg as containing a shadow copy of guest
5454 * SIMD register @a enmGstSimdReg.
5455 *
5456 * ASSUMES that caller has made sure @a enmGstSimdReg is not associated with any
5457 * host register before calling.
5458 */
5459DECL_FORCE_INLINE(void)
5460iemNativeSimdRegMarkAsGstSimdRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg, IEMNATIVEGSTSIMDREG enmGstSimdReg, uint32_t off)
5461{
5462 Assert(!(pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg)));
5463 Assert(!pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5464 Assert((unsigned)enmGstSimdReg < (unsigned)kIemNativeGstSimdReg_End);
5465
5466 pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg] = idxHstSimdReg;
5467 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows |= RT_BIT_64(enmGstSimdReg);
5468 pReNative->Core.bmGstSimdRegShadows |= RT_BIT_64(enmGstSimdReg);
5469 pReNative->Core.bmHstSimdRegsWithGstShadow |= RT_BIT_32(idxHstSimdReg);
5470#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5471 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5472 iemNativeDbgInfoAddGuestSimdRegShadowing(pReNative, enmGstSimdReg, idxHstSimdReg);
5473#else
5474 RT_NOREF(off);
5475#endif
5476}
5477
5478
5479/**
5480 * Transfers the guest SIMD register shadow claims of @a enmGstSimdReg from @a idxSimdRegFrom
5481 * to @a idxSimdRegTo.
5482 */
5483DECL_FORCE_INLINE(void)
5484iemNativeSimdRegTransferGstSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxSimdRegFrom, uint8_t idxSimdRegTo,
5485 IEMNATIVEGSTSIMDREG enmGstSimdReg, uint32_t off)
5486{
5487 Assert(pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
5488 Assert(pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg] == idxSimdRegFrom);
5489 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows)
5490 == pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows
5491 && pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5492 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxSimdRegTo].fGstRegShadows)
5493 == pReNative->Core.aHstSimdRegs[idxSimdRegTo].fGstRegShadows);
5494 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdRegFrom))
5495 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows));
5496 Assert( pReNative->Core.aHstSimdRegs[idxSimdRegFrom].enmLoaded
5497 == pReNative->Core.aHstSimdRegs[idxSimdRegTo].enmLoaded);
5498
5499
5500 uint64_t const fGstRegShadowsFrom = pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows & ~RT_BIT_64(enmGstSimdReg);
5501 pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows = fGstRegShadowsFrom;
5502 if (!fGstRegShadowsFrom)
5503 {
5504 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxSimdRegFrom);
5505 pReNative->Core.aHstSimdRegs[idxSimdRegFrom].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5506 }
5507 pReNative->Core.bmHstSimdRegsWithGstShadow |= RT_BIT_32(idxSimdRegTo);
5508 pReNative->Core.aHstSimdRegs[idxSimdRegTo].fGstRegShadows |= RT_BIT_64(enmGstSimdReg);
5509 pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg] = idxSimdRegTo;
5510#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5511 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5512 iemNativeDbgInfoAddGuestSimdRegShadowing(pReNative, enmGstSimdReg, idxSimdRegTo, idxSimdRegFrom);
5513#else
5514 RT_NOREF(off);
5515#endif
5516}
5517
5518
5519/**
5520 * Clear any guest register shadow claims from @a idxHstSimdReg.
5521 *
5522 * The register does not need to be shadowing any guest registers.
5523 */
5524DECL_FORCE_INLINE(void)
5525iemNativeSimdRegClearGstSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg, uint32_t off)
5526{
5527 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5528 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows
5529 && pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstSimdReg_End));
5530 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstSimdReg))
5531 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5532 Assert( !(pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadowDirtyLo128)
5533 && !(pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadowDirtyHi128));
5534
5535#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5536 uint64_t fGstRegs = pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
5537 if (fGstRegs)
5538 {
5539 Assert(fGstRegs < RT_BIT_64(kIemNativeGstSimdReg_End));
5540 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5541 while (fGstRegs)
5542 {
5543 unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5544 fGstRegs &= ~RT_BIT_64(iGstReg);
5545 iemNativeDbgInfoAddGuestSimdRegShadowing(pReNative, (IEMNATIVEGSTSIMDREG)iGstReg, UINT8_MAX, idxHstSimdReg);
5546 }
5547 }
5548#else
5549 RT_NOREF(off);
5550#endif
5551
5552 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstSimdReg);
5553 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
5554 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5555 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5556}
5557
5558
5559/**
5560 * Flushes a set of guest register shadow copies.
5561 *
5562 * This is usually done after calling a threaded function or a C-implementation
5563 * of an instruction.
5564 *
5565 * @param pReNative The native recompile state.
5566 * @param fGstSimdRegs Set of guest SIMD registers to flush.
5567 */
5568DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
5569{
5570 /*
5571 * Reduce the mask by what's currently shadowed
5572 */
5573 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
5574 fGstSimdRegs &= bmGstSimdRegShadows;
5575 if (fGstSimdRegs)
5576 {
5577 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
5578 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
5579 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
5580 if (bmGstSimdRegShadowsNew)
5581 {
5582 /*
5583 * Partial.
5584 */
5585 do
5586 {
5587 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5588 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5589 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5590 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5591 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5592 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5593
5594 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
5595 fGstSimdRegs &= ~fInThisHstReg;
5596 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5597 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5598 if (!fGstRegShadowsNew)
5599 {
5600 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5601 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5602 }
5603 } while (fGstSimdRegs != 0);
5604 }
5605 else
5606 {
5607 /*
5608 * Clear all.
5609 */
5610 do
5611 {
5612 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5613 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5614 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5615 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5616 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5617 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5618
5619 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5620 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
5621 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5622 } while (fGstSimdRegs != 0);
5623 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
5624 }
5625 }
5626}
5627
5628
5629/**
5630 * Allocates a temporary host SIMD register.
5631 *
5632 * This may emit code to save register content onto the stack in order to free
5633 * up a register.
5634 *
5635 * @returns The host register number; throws VBox status code on failure,
5636 * so no need to check the return value.
5637 * @param pReNative The native recompile state.
5638 * @param poff Pointer to the variable with the code buffer position.
5639 * This will be update if we need to move a variable from
5640 * register to stack in order to satisfy the request.
5641 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5642 * registers (@c true, default) or the other way around
5643 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5644 */
5645DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
5646{
5647 /*
5648 * Try find a completely unused register, preferably a call-volatile one.
5649 */
5650 uint8_t idxSimdReg;
5651 uint32_t fRegs = ~pReNative->Core.bmHstRegs
5652 & ~pReNative->Core.bmHstRegsWithGstShadow
5653 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
5654 if (fRegs)
5655 {
5656 if (fPreferVolatile)
5657 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5658 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5659 else
5660 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5661 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5662 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5663 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5664 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5665 }
5666 else
5667 {
5668 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
5669 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5670 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5671 }
5672
5673 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5674 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5675}
5676
5677
5678/**
5679 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
5680 * registers.
5681 *
5682 * @returns The host register number; throws VBox status code on failure,
5683 * so no need to check the return value.
5684 * @param pReNative The native recompile state.
5685 * @param poff Pointer to the variable with the code buffer position.
5686 * This will be update if we need to move a variable from
5687 * register to stack in order to satisfy the request.
5688 * @param fRegMask Mask of acceptable registers.
5689 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5690 * registers (@c true, default) or the other way around
5691 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5692 */
5693DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
5694 bool fPreferVolatile /*= true*/)
5695{
5696 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5697 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5698
5699 /*
5700 * Try find a completely unused register, preferably a call-volatile one.
5701 */
5702 uint8_t idxSimdReg;
5703 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
5704 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5705 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
5706 & fRegMask;
5707 if (fRegs)
5708 {
5709 if (fPreferVolatile)
5710 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5711 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5712 else
5713 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5714 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5715 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5716 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5717 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5718 }
5719 else
5720 {
5721 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
5722 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5723 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5724 }
5725
5726 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5727 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5728}
5729
5730
5731static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdRegDst,
5732 uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
5733{
5734 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
5735 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegDst].enmLoaded == enmLoadSzDst
5736 || pReNative->Core.aHstSimdRegs[idxHstSimdRegDst].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5737 {
5738# ifdef RT_ARCH_ARM64
5739 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
5740 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
5741# endif
5742
5743 switch (enmLoadSzDst)
5744 {
5745 case kIemNativeGstSimdRegLdStSz_256:
5746 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5747 break;
5748 case kIemNativeGstSimdRegLdStSz_Low128:
5749 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5750 break;
5751 case kIemNativeGstSimdRegLdStSz_High128:
5752 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst + 1, idxHstSimdRegSrc + 1);
5753 break;
5754 default:
5755 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5756 }
5757
5758 pReNative->Core.aHstSimdRegs[idxHstSimdRegDst].enmLoaded = enmLoadSzDst;
5759 return off;
5760 }
5761 else
5762 {
5763 /* Complicated stuff where the source is currently missing something, later. */
5764 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5765 }
5766
5767 return off;
5768}
5769
5770
5771/**
5772 * Allocates a temporary host SIMD register for keeping a guest
5773 * SIMD register value.
5774 *
5775 * Since we may already have a register holding the guest register value,
5776 * code will be emitted to do the loading if that's not the case. Code may also
5777 * be emitted if we have to free up a register to satify the request.
5778 *
5779 * @returns The host register number; throws VBox status code on failure, so no
5780 * need to check the return value.
5781 * @param pReNative The native recompile state.
5782 * @param poff Pointer to the variable with the code buffer
5783 * position. This will be update if we need to move a
5784 * variable from register to stack in order to satisfy
5785 * the request.
5786 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
5787 * @param enmIntendedUse How the caller will be using the host register.
5788 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
5789 * register is okay (default). The ASSUMPTION here is
5790 * that the caller has already flushed all volatile
5791 * registers, so this is only applied if we allocate a
5792 * new register.
5793 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
5794 */
5795DECL_HIDDEN_THROW(uint8_t)
5796iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
5797 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
5798 bool fNoVolatileRegs /*= false*/)
5799{
5800 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
5801#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
5802 AssertMsg( pReNative->idxCurCall == 0
5803 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5804 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5805 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
5806 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5807 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
5808 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
5809#endif
5810#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5811 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
5812#endif
5813 uint32_t const fRegMask = !fNoVolatileRegs
5814 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
5815 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
5816
5817 /*
5818 * First check if the guest register value is already in a host register.
5819 */
5820 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
5821 {
5822 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5823 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
5824 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
5825 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
5826
5827 /* It's not supposed to be allocated... */
5828 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
5829 {
5830 /*
5831 * If the register will trash the guest shadow copy, try find a
5832 * completely unused register we can use instead. If that fails,
5833 * we need to disassociate the host reg from the guest reg.
5834 */
5835 /** @todo would be nice to know if preserving the register is in any way helpful. */
5836 /* If the purpose is calculations, try duplicate the register value as
5837 we'll be clobbering the shadow. */
5838 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
5839 && ( ~pReNative->Core.bmHstSimdRegs
5840 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5841 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
5842 {
5843 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
5844
5845 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5846
5847 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5848 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5849 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5850 idxSimdReg = idxRegNew;
5851 }
5852 /* If the current register matches the restrictions, go ahead and allocate
5853 it for the caller. */
5854 else if (fRegMask & RT_BIT_32(idxSimdReg))
5855 {
5856 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5857 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
5858 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5859 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
5860 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5861 else
5862 {
5863 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
5864 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
5865 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
5866 }
5867 }
5868 /* Otherwise, allocate a register that satisfies the caller and transfer
5869 the shadowing if compatible with the intended use. (This basically
5870 means the call wants a non-volatile register (RSP push/pop scenario).) */
5871 else
5872 {
5873 Assert(fNoVolatileRegs);
5874 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
5875 !fNoVolatileRegs
5876 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
5877 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5878 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5879 {
5880 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5881 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
5882 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
5883 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5884 }
5885 else
5886 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5887 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5888 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5889 idxSimdReg = idxRegNew;
5890 }
5891 }
5892 else
5893 {
5894 /*
5895 * Oops. Shadowed guest register already allocated!
5896 *
5897 * Allocate a new register, copy the value and, if updating, the
5898 * guest shadow copy assignment to the new register.
5899 */
5900 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5901 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
5902 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
5903 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
5904
5905 /** @todo share register for readonly access. */
5906 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
5907 enmIntendedUse == kIemNativeGstRegUse_Calculation);
5908
5909 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5910 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5911 else
5912 {
5913 /** @todo This is a bit unsafe to mark the register already as loaded even though there is nothing written to it yet. */
5914 pReNative->Core.aHstSimdRegs[idxRegNew].enmLoaded = enmLoadSz;
5915 }
5916
5917 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5918 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5919 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
5920 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5921 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5922 else
5923 {
5924 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5925 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
5926 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5927 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5928 }
5929 idxSimdReg = idxRegNew;
5930 }
5931 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
5932
5933#ifdef VBOX_STRICT
5934 /* Strict builds: Check that the value is correct. */
5935 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
5936#endif
5937
5938 return idxSimdReg;
5939 }
5940
5941 /*
5942 * Allocate a new register, load it with the guest value and designate it as a copy of the
5943 */
5944 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
5945
5946 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5947 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
5948 else
5949 {
5950 /** @todo This is a bit unsafe to mark the register already as loaded even though there is nothing written to it yet. */
5951 pReNative->Core.aHstSimdRegs[idxRegNew].enmLoaded = enmLoadSz;
5952 }
5953
5954 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5955 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
5956
5957 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
5958 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5959
5960 return idxRegNew;
5961}
5962
5963
5964/**
5965 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
5966 *
5967 * @returns New code bufferoffset.
5968 * @param pReNative The native recompile state.
5969 * @param off Current code buffer position.
5970 * @param idxGstSimdReg The guest SIMD register to flush.
5971 */
5972static uint32_t iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGstSimdReg)
5973{
5974 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxGstSimdReg];
5975
5976 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
5977 g_aGstSimdShadowInfo[idxGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
5978 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, idxGstSimdReg),
5979 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, idxGstSimdReg)));
5980
5981 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, idxGstSimdReg))
5982 {
5983 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5984 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
5985 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[idxGstSimdReg].offXmm);
5986 }
5987
5988 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, idxGstSimdReg))
5989 {
5990 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5991 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
5992 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[idxGstSimdReg].offYmm);
5993 }
5994
5995 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, idxGstSimdReg);
5996 return off;
5997}
5998
5999#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6000
6001
6002
6003/*********************************************************************************************************************************
6004* Code emitters for flushing pending guest register writes and sanity checks *
6005*********************************************************************************************************************************/
6006
6007/**
6008 * Flushes delayed write of a specific guest register.
6009 *
6010 * This must be called prior to calling CImpl functions and any helpers that use
6011 * the guest state (like raising exceptions) and such.
6012 *
6013 * This optimization has not yet been implemented. The first target would be
6014 * RIP updates, since these are the most common ones.
6015 */
6016DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6017 IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
6018{
6019#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6020 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
6021#endif
6022
6023#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6024 if ( enmClass == kIemNativeGstRegRef_XReg
6025 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxReg))
6026 {
6027 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, idxReg);
6028 /* Flush the shadows as the register needs to be reloaded (there is no guarantee right now, that the referenced register doesn't change). */
6029 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxReg];
6030
6031 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
6032 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxReg)));
6033 }
6034#endif
6035 RT_NOREF(pReNative, enmClass, idxReg);
6036 return off;
6037}
6038
6039
6040/**
6041 * Flushes any delayed guest register writes.
6042 *
6043 * This must be called prior to calling CImpl functions and any helpers that use
6044 * the guest state (like raising exceptions) and such.
6045 *
6046 * This optimization has not yet been implemented. The first target would be
6047 * RIP updates, since these are the most common ones.
6048 */
6049DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept /*= 0*/,
6050 bool fFlushShadows /*= true*/)
6051{
6052#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6053 if (!(fGstShwExcept & kIemNativeGstReg_Pc))
6054 off = iemNativeEmitPcWriteback(pReNative, off);
6055#else
6056 RT_NOREF(pReNative, fGstShwExcept);
6057#endif
6058
6059#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6060 /** @todo This doesn't mix well with fGstShwExcept but we ignore this for now and just flush everything. */
6061 for (uint8_t idxGstSimdReg = 0; idxGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo); idxGstSimdReg++)
6062 {
6063 Assert( (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg)
6064 || !IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg)));
6065
6066 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg))
6067 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, idxGstSimdReg);
6068
6069 if ( fFlushShadows
6070 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg))
6071 {
6072 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxGstSimdReg];
6073
6074 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
6075 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg)));
6076 }
6077 }
6078#else
6079 RT_NOREF(pReNative, fGstShwExcept, fFlushShadows);
6080#endif
6081
6082 return off;
6083}
6084
6085
6086#ifdef VBOX_STRICT
6087/**
6088 * Does internal register allocator sanity checks.
6089 */
6090static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
6091{
6092 /*
6093 * Iterate host registers building a guest shadowing set.
6094 */
6095 uint64_t bmGstRegShadows = 0;
6096 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
6097 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
6098 while (bmHstRegsWithGstShadow)
6099 {
6100 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
6101 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
6102 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
6103
6104 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
6105 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
6106 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
6107 bmGstRegShadows |= fThisGstRegShadows;
6108 while (fThisGstRegShadows)
6109 {
6110 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
6111 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
6112 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
6113 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
6114 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
6115 }
6116 }
6117 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
6118 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
6119 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
6120
6121 /*
6122 * Now the other way around, checking the guest to host index array.
6123 */
6124 bmHstRegsWithGstShadow = 0;
6125 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
6126 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
6127 while (bmGstRegShadows)
6128 {
6129 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
6130 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
6131 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
6132
6133 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
6134 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
6135 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
6136 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
6137 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
6138 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
6139 }
6140 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
6141 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
6142 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
6143}
6144#endif
6145
6146
6147/*********************************************************************************************************************************
6148* Code Emitters (larger snippets) *
6149*********************************************************************************************************************************/
6150
6151/**
6152 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
6153 * extending to 64-bit width.
6154 *
6155 * @returns New code buffer offset on success, UINT32_MAX on failure.
6156 * @param pReNative .
6157 * @param off The current code buffer position.
6158 * @param idxHstReg The host register to load the guest register value into.
6159 * @param enmGstReg The guest register to load.
6160 *
6161 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6162 * that is something the caller needs to do if applicable.
6163 */
6164DECL_HIDDEN_THROW(uint32_t)
6165iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
6166{
6167 Assert((unsigned)enmGstReg < RT_ELEMENTS(g_aGstShadowInfo));
6168 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
6169
6170 switch (g_aGstShadowInfo[enmGstReg].cb)
6171 {
6172 case sizeof(uint64_t):
6173 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6174 case sizeof(uint32_t):
6175 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6176 case sizeof(uint16_t):
6177 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6178#if 0 /* not present in the table. */
6179 case sizeof(uint8_t):
6180 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6181#endif
6182 default:
6183 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6184 }
6185}
6186
6187
6188#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6189/**
6190 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
6191 *
6192 * @returns New code buffer offset on success, UINT32_MAX on failure.
6193 * @param pReNative The recompiler state.
6194 * @param off The current code buffer position.
6195 * @param idxHstSimdReg The host register to load the guest register value into.
6196 * @param enmGstSimdReg The guest register to load.
6197 * @param enmLoadSz The load size of the register.
6198 *
6199 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6200 * that is something the caller needs to do if applicable.
6201 */
6202DECL_HIDDEN_THROW(uint32_t)
6203iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
6204 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6205{
6206 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
6207
6208 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
6209 switch (enmLoadSz)
6210 {
6211 case kIemNativeGstSimdRegLdStSz_256:
6212 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6213 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6214 case kIemNativeGstSimdRegLdStSz_Low128:
6215 return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6216 case kIemNativeGstSimdRegLdStSz_High128:
6217 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6218 default:
6219 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6220 }
6221}
6222#endif
6223
6224#ifdef VBOX_STRICT
6225/**
6226 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
6227 *
6228 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6229 * Trashes EFLAGS on AMD64.
6230 */
6231static uint32_t
6232iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
6233{
6234# ifdef RT_ARCH_AMD64
6235 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6236
6237 /* rol reg64, 32 */
6238 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6239 pbCodeBuf[off++] = 0xc1;
6240 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6241 pbCodeBuf[off++] = 32;
6242
6243 /* test reg32, ffffffffh */
6244 if (idxReg >= 8)
6245 pbCodeBuf[off++] = X86_OP_REX_B;
6246 pbCodeBuf[off++] = 0xf7;
6247 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6248 pbCodeBuf[off++] = 0xff;
6249 pbCodeBuf[off++] = 0xff;
6250 pbCodeBuf[off++] = 0xff;
6251 pbCodeBuf[off++] = 0xff;
6252
6253 /* je/jz +1 */
6254 pbCodeBuf[off++] = 0x74;
6255 pbCodeBuf[off++] = 0x01;
6256
6257 /* int3 */
6258 pbCodeBuf[off++] = 0xcc;
6259
6260 /* rol reg64, 32 */
6261 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6262 pbCodeBuf[off++] = 0xc1;
6263 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6264 pbCodeBuf[off++] = 32;
6265
6266# elif defined(RT_ARCH_ARM64)
6267 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6268 /* lsr tmp0, reg64, #32 */
6269 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
6270 /* cbz tmp0, +1 */
6271 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6272 /* brk #0x1100 */
6273 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
6274
6275# else
6276# error "Port me!"
6277# endif
6278 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6279 return off;
6280}
6281#endif /* VBOX_STRICT */
6282
6283
6284#ifdef VBOX_STRICT
6285/**
6286 * Emitting code that checks that the content of register @a idxReg is the same
6287 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
6288 * instruction if that's not the case.
6289 *
6290 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6291 * Trashes EFLAGS on AMD64.
6292 */
6293static uint32_t
6294iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
6295{
6296# ifdef RT_ARCH_AMD64
6297 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6298
6299 /* cmp reg, [mem] */
6300 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
6301 {
6302 if (idxReg >= 8)
6303 pbCodeBuf[off++] = X86_OP_REX_R;
6304 pbCodeBuf[off++] = 0x38;
6305 }
6306 else
6307 {
6308 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
6309 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
6310 else
6311 {
6312 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
6313 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6314 else
6315 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
6316 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
6317 if (idxReg >= 8)
6318 pbCodeBuf[off++] = X86_OP_REX_R;
6319 }
6320 pbCodeBuf[off++] = 0x39;
6321 }
6322 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
6323
6324 /* je/jz +1 */
6325 pbCodeBuf[off++] = 0x74;
6326 pbCodeBuf[off++] = 0x01;
6327
6328 /* int3 */
6329 pbCodeBuf[off++] = 0xcc;
6330
6331 /* For values smaller than the register size, we must check that the rest
6332 of the register is all zeros. */
6333 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
6334 {
6335 /* test reg64, imm32 */
6336 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6337 pbCodeBuf[off++] = 0xf7;
6338 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6339 pbCodeBuf[off++] = 0;
6340 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
6341 pbCodeBuf[off++] = 0xff;
6342 pbCodeBuf[off++] = 0xff;
6343
6344 /* je/jz +1 */
6345 pbCodeBuf[off++] = 0x74;
6346 pbCodeBuf[off++] = 0x01;
6347
6348 /* int3 */
6349 pbCodeBuf[off++] = 0xcc;
6350 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6351 }
6352 else
6353 {
6354 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6355 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
6356 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
6357 }
6358
6359# elif defined(RT_ARCH_ARM64)
6360 /* mov TMP0, [gstreg] */
6361 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
6362
6363 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6364 /* sub tmp0, tmp0, idxReg */
6365 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
6366 /* cbz tmp0, +1 */
6367 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6368 /* brk #0x1000+enmGstReg */
6369 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
6370 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6371
6372# else
6373# error "Port me!"
6374# endif
6375 return off;
6376}
6377
6378
6379# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6380/**
6381 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
6382 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
6383 * instruction if that's not the case.
6384 *
6385 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
6386 * Trashes EFLAGS on AMD64.
6387 */
6388static uint32_t
6389iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg, IEMNATIVEGSTSIMDREG enmGstSimdReg,
6390 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6391{
6392# ifdef RT_ARCH_AMD64
6393 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128); /** @todo 256-bit variant. */
6394
6395 /* movdqa vectmp0, idxSimdReg */
6396 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6397
6398 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
6399
6400 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
6401 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6402 if (idxSimdReg >= 8)
6403 pbCodeBuf[off++] = X86_OP_REX_R;
6404 pbCodeBuf[off++] = 0x0f;
6405 pbCodeBuf[off++] = 0x38;
6406 pbCodeBuf[off++] = 0x29;
6407 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6408
6409 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
6410 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6411 pbCodeBuf[off++] = X86_OP_REX_W
6412 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
6413 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6414 pbCodeBuf[off++] = 0x0f;
6415 pbCodeBuf[off++] = 0x3a;
6416 pbCodeBuf[off++] = 0x16;
6417 pbCodeBuf[off++] = 0xeb;
6418 pbCodeBuf[off++] = 0x00;
6419
6420 /* test tmp0, 0xffffffff. */
6421 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6422 pbCodeBuf[off++] = 0xf7;
6423 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, IEMNATIVE_REG_FIXED_TMP0 & 7);
6424 pbCodeBuf[off++] = 0xff;
6425 pbCodeBuf[off++] = 0xff;
6426 pbCodeBuf[off++] = 0xff;
6427 pbCodeBuf[off++] = 0xff;
6428
6429 /* je/jz +1 */
6430 pbCodeBuf[off++] = 0x74;
6431 pbCodeBuf[off++] = 0x01;
6432
6433 /* int3 */
6434 pbCodeBuf[off++] = 0xcc;
6435
6436 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
6437 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6438 pbCodeBuf[off++] = X86_OP_REX_W
6439 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
6440 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6441 pbCodeBuf[off++] = 0x0f;
6442 pbCodeBuf[off++] = 0x3a;
6443 pbCodeBuf[off++] = 0x16;
6444 pbCodeBuf[off++] = 0xeb;
6445 pbCodeBuf[off++] = 0x01;
6446
6447 /* test tmp0, 0xffffffff. */
6448 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6449 pbCodeBuf[off++] = 0xf7;
6450 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, IEMNATIVE_REG_FIXED_TMP0 & 7);
6451 pbCodeBuf[off++] = 0xff;
6452 pbCodeBuf[off++] = 0xff;
6453 pbCodeBuf[off++] = 0xff;
6454 pbCodeBuf[off++] = 0xff;
6455
6456 /* je/jz +1 */
6457 pbCodeBuf[off++] = 0x74;
6458 pbCodeBuf[off++] = 0x01;
6459
6460 /* int3 */
6461 pbCodeBuf[off++] = 0xcc;
6462
6463# elif defined(RT_ARCH_ARM64)
6464 /* mov vectmp0, [gstreg] */
6465 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
6466
6467 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6468 {
6469 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6470 /* eor vectmp0, vectmp0, idxSimdReg */
6471 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6472 /* cnt vectmp0, vectmp0, #0*/
6473 pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0);
6474 /* umov tmp0, vectmp0.D[0] */
6475 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6476 0 /*idxElem*/, kArmv8InstrUmovSz_U64);
6477 /* cbz tmp0, +1 */
6478 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6479 /* brk #0x1000+enmGstReg */
6480 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6481 }
6482
6483 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6484 {
6485 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6486 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
6487 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg);
6488 /* cnt vectmp0 + 1, vectmp0 + 1, #0*/
6489 pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1);
6490 /* umov tmp0, (vectmp0 + 1).D[0] */
6491 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
6492 0 /*idxElem*/, kArmv8InstrUmovSz_U64);
6493 /* cbz tmp0, +1 */
6494 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6495 /* brk #0x1000+enmGstReg */
6496 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6497 }
6498
6499# else
6500# error "Port me!"
6501# endif
6502
6503 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6504 return off;
6505}
6506# endif
6507#endif /* VBOX_STRICT */
6508
6509
6510#ifdef VBOX_STRICT
6511/**
6512 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
6513 * important bits.
6514 *
6515 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6516 * Trashes EFLAGS on AMD64.
6517 */
6518static uint32_t
6519iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
6520{
6521 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6522 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
6523 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
6524 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
6525
6526#ifdef RT_ARCH_AMD64
6527 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6528
6529 /* je/jz +1 */
6530 pbCodeBuf[off++] = 0x74;
6531 pbCodeBuf[off++] = 0x01;
6532
6533 /* int3 */
6534 pbCodeBuf[off++] = 0xcc;
6535
6536# elif defined(RT_ARCH_ARM64)
6537 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6538
6539 /* b.eq +1 */
6540 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
6541 /* brk #0x2000 */
6542 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
6543
6544# else
6545# error "Port me!"
6546# endif
6547 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6548
6549 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6550 return off;
6551}
6552#endif /* VBOX_STRICT */
6553
6554
6555/**
6556 * Emits a code for checking the return code of a call and rcPassUp, returning
6557 * from the code if either are non-zero.
6558 */
6559DECL_HIDDEN_THROW(uint32_t)
6560iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6561{
6562#ifdef RT_ARCH_AMD64
6563 /*
6564 * AMD64: eax = call status code.
6565 */
6566
6567 /* edx = rcPassUp */
6568 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6569# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6570 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
6571# endif
6572
6573 /* edx = eax | rcPassUp */
6574 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6575 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
6576 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
6577 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6578
6579 /* Jump to non-zero status return path. */
6580 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
6581
6582 /* done. */
6583
6584#elif RT_ARCH_ARM64
6585 /*
6586 * ARM64: w0 = call status code.
6587 */
6588# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6589 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
6590# endif
6591 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6592
6593 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6594
6595 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
6596
6597 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6598 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6599 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
6600
6601#else
6602# error "port me"
6603#endif
6604 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6605 RT_NOREF_PV(idxInstr);
6606 return off;
6607}
6608
6609
6610/**
6611 * Emits code to check if the content of @a idxAddrReg is a canonical address,
6612 * raising a \#GP(0) if it isn't.
6613 *
6614 * @returns New code buffer offset, UINT32_MAX on failure.
6615 * @param pReNative The native recompile state.
6616 * @param off The code buffer offset.
6617 * @param idxAddrReg The host register with the address to check.
6618 * @param idxInstr The current instruction.
6619 */
6620DECL_HIDDEN_THROW(uint32_t)
6621iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
6622{
6623 /*
6624 * Make sure we don't have any outstanding guest register writes as we may
6625 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6626 */
6627 off = iemNativeRegFlushPendingWrites(pReNative, off);
6628
6629#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6630 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6631#else
6632 RT_NOREF(idxInstr);
6633#endif
6634
6635#ifdef RT_ARCH_AMD64
6636 /*
6637 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
6638 * return raisexcpt();
6639 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
6640 */
6641 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6642
6643 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
6644 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
6645 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
6646 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
6647 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6648
6649 iemNativeRegFreeTmp(pReNative, iTmpReg);
6650
6651#elif defined(RT_ARCH_ARM64)
6652 /*
6653 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
6654 * return raisexcpt();
6655 * ----
6656 * mov x1, 0x800000000000
6657 * add x1, x0, x1
6658 * cmp xzr, x1, lsr 48
6659 * b.ne .Lraisexcpt
6660 */
6661 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6662
6663 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
6664 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
6665 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
6666 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6667
6668 iemNativeRegFreeTmp(pReNative, iTmpReg);
6669
6670#else
6671# error "Port me"
6672#endif
6673 return off;
6674}
6675
6676
6677/**
6678 * Emits code to check if that the content of @a idxAddrReg is within the limit
6679 * of CS, raising a \#GP(0) if it isn't.
6680 *
6681 * @returns New code buffer offset; throws VBox status code on error.
6682 * @param pReNative The native recompile state.
6683 * @param off The code buffer offset.
6684 * @param idxAddrReg The host register (32-bit) with the address to
6685 * check.
6686 * @param idxInstr The current instruction.
6687 */
6688DECL_HIDDEN_THROW(uint32_t)
6689iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6690 uint8_t idxAddrReg, uint8_t idxInstr)
6691{
6692 /*
6693 * Make sure we don't have any outstanding guest register writes as we may
6694 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6695 */
6696 off = iemNativeRegFlushPendingWrites(pReNative, off);
6697
6698#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6699 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6700#else
6701 RT_NOREF(idxInstr);
6702#endif
6703
6704 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
6705 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
6706 kIemNativeGstRegUse_ReadOnly);
6707
6708 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
6709 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6710
6711 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
6712 return off;
6713}
6714
6715
6716/**
6717 * Converts IEM_CIMPL_F_XXX flags into a guest register shadow copy flush mask.
6718 *
6719 * @returns The flush mask.
6720 * @param fCImpl The IEM_CIMPL_F_XXX flags.
6721 * @param fGstShwFlush The starting flush mask.
6722 */
6723DECL_FORCE_INLINE(uint64_t) iemNativeCImplFlagsToGuestShadowFlushMask(uint32_t fCImpl, uint64_t fGstShwFlush)
6724{
6725 if (fCImpl & IEM_CIMPL_F_BRANCH_FAR)
6726 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_CS)
6727 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_CS)
6728 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS);
6729 if (fCImpl & IEM_CIMPL_F_BRANCH_STACK_FAR)
6730 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP)
6731 | RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_SS)
6732 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_SS)
6733 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_SS);
6734 else if (fCImpl & IEM_CIMPL_F_BRANCH_STACK)
6735 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP);
6736 if (fCImpl & (IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_STATUS_FLAGS | IEM_CIMPL_F_INHIBIT_SHADOW))
6737 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
6738 return fGstShwFlush;
6739}
6740
6741
6742/**
6743 * Emits a call to a CImpl function or something similar.
6744 */
6745DECL_HIDDEN_THROW(uint32_t)
6746iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
6747 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
6748{
6749 /* Writeback everything. */
6750 off = iemNativeRegFlushPendingWrites(pReNative, off);
6751
6752 /*
6753 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
6754 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
6755 */
6756 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
6757 fGstShwFlush
6758 | RT_BIT_64(kIemNativeGstReg_Pc)
6759 | RT_BIT_64(kIemNativeGstReg_EFlags));
6760 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6761
6762 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6763
6764 /*
6765 * Load the parameters.
6766 */
6767#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
6768 /* Special code the hidden VBOXSTRICTRC pointer. */
6769 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6770 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6771 if (cAddParams > 0)
6772 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
6773 if (cAddParams > 1)
6774 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
6775 if (cAddParams > 2)
6776 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
6777 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6778
6779#else
6780 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6781 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6782 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6783 if (cAddParams > 0)
6784 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
6785 if (cAddParams > 1)
6786 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
6787 if (cAddParams > 2)
6788# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
6789 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
6790# else
6791 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
6792# endif
6793#endif
6794
6795 /*
6796 * Make the call.
6797 */
6798 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
6799
6800#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6801 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6802#endif
6803
6804 /*
6805 * Check the status code.
6806 */
6807 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
6808}
6809
6810
6811/**
6812 * Emits a call to a threaded worker function.
6813 */
6814DECL_HIDDEN_THROW(uint32_t)
6815iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6816{
6817 /* We don't know what the threaded function is doing so we must flush all pending writes. */
6818 off = iemNativeRegFlushPendingWrites(pReNative, off);
6819
6820 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
6821 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6822
6823#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6824 /* The threaded function may throw / long jmp, so set current instruction
6825 number if we're counting. */
6826 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6827#endif
6828
6829 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
6830
6831#ifdef RT_ARCH_AMD64
6832 /* Load the parameters and emit the call. */
6833# ifdef RT_OS_WINDOWS
6834# ifndef VBOXSTRICTRC_STRICT_ENABLED
6835 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6836 if (cParams > 0)
6837 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
6838 if (cParams > 1)
6839 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
6840 if (cParams > 2)
6841 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
6842# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
6843 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
6844 if (cParams > 0)
6845 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
6846 if (cParams > 1)
6847 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
6848 if (cParams > 2)
6849 {
6850 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
6851 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
6852 }
6853 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6854# endif /* VBOXSTRICTRC_STRICT_ENABLED */
6855# else
6856 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6857 if (cParams > 0)
6858 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
6859 if (cParams > 1)
6860 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
6861 if (cParams > 2)
6862 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
6863# endif
6864
6865 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6866
6867# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6868 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6869# endif
6870
6871#elif RT_ARCH_ARM64
6872 /*
6873 * ARM64:
6874 */
6875 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6876 if (cParams > 0)
6877 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
6878 if (cParams > 1)
6879 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
6880 if (cParams > 2)
6881 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
6882
6883 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6884
6885#else
6886# error "port me"
6887#endif
6888
6889 /*
6890 * Check the status code.
6891 */
6892 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
6893
6894 return off;
6895}
6896
6897#ifdef VBOX_WITH_STATISTICS
6898/**
6899 * Emits code to update the thread call statistics.
6900 */
6901DECL_INLINE_THROW(uint32_t)
6902iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6903{
6904 /*
6905 * Update threaded function stats.
6906 */
6907 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
6908 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
6909# if defined(RT_ARCH_ARM64)
6910 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6911 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6912 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
6913 iemNativeRegFreeTmp(pReNative, idxTmp1);
6914 iemNativeRegFreeTmp(pReNative, idxTmp2);
6915# else
6916 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
6917# endif
6918 return off;
6919}
6920#endif /* VBOX_WITH_STATISTICS */
6921
6922
6923/**
6924 * Emits the code at the CheckBranchMiss label.
6925 */
6926static uint32_t iemNativeEmitCheckBranchMiss(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6927{
6928 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_CheckBranchMiss);
6929 if (idxLabel != UINT32_MAX)
6930 {
6931 iemNativeLabelDefine(pReNative, idxLabel, off);
6932
6933 /* int iemNativeHlpCheckBranchMiss(PVMCPUCC pVCpu) */
6934 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6935 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpCheckBranchMiss);
6936
6937 /* jump back to the return sequence. */
6938 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6939 }
6940 return off;
6941}
6942
6943
6944/**
6945 * Emits the code at the NeedCsLimChecking label.
6946 */
6947static uint32_t iemNativeEmitNeedCsLimChecking(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6948{
6949 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NeedCsLimChecking);
6950 if (idxLabel != UINT32_MAX)
6951 {
6952 iemNativeLabelDefine(pReNative, idxLabel, off);
6953
6954 /* int iemNativeHlpNeedCsLimChecking(PVMCPUCC pVCpu) */
6955 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6956 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpNeedCsLimChecking);
6957
6958 /* jump back to the return sequence. */
6959 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6960 }
6961 return off;
6962}
6963
6964
6965/**
6966 * Emits the code at the ObsoleteTb label.
6967 */
6968static uint32_t iemNativeEmitObsoleteTb(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6969{
6970 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ObsoleteTb);
6971 if (idxLabel != UINT32_MAX)
6972 {
6973 iemNativeLabelDefine(pReNative, idxLabel, off);
6974
6975 /* int iemNativeHlpObsoleteTb(PVMCPUCC pVCpu) */
6976 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6977 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpObsoleteTb);
6978
6979 /* jump back to the return sequence. */
6980 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6981 }
6982 return off;
6983}
6984
6985
6986/**
6987 * Emits the code at the RaiseGP0 label.
6988 */
6989static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6990{
6991 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseGp0);
6992 if (idxLabel != UINT32_MAX)
6993 {
6994 iemNativeLabelDefine(pReNative, idxLabel, off);
6995
6996 /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu) */
6997 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6998 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
6999
7000 /* jump back to the return sequence. */
7001 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7002 }
7003 return off;
7004}
7005
7006
7007/**
7008 * Emits the code at the RaiseNm label.
7009 */
7010static uint32_t iemNativeEmitRaiseNm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7011{
7012 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseNm);
7013 if (idxLabel != UINT32_MAX)
7014 {
7015 iemNativeLabelDefine(pReNative, idxLabel, off);
7016
7017 /* iemNativeHlpExecRaiseNm(PVMCPUCC pVCpu) */
7018 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7019 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseNm);
7020
7021 /* jump back to the return sequence. */
7022 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7023 }
7024 return off;
7025}
7026
7027
7028/**
7029 * Emits the code at the RaiseUd label.
7030 */
7031static uint32_t iemNativeEmitRaiseUd(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7032{
7033 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseUd);
7034 if (idxLabel != UINT32_MAX)
7035 {
7036 iemNativeLabelDefine(pReNative, idxLabel, off);
7037
7038 /* iemNativeHlpExecRaiseUd(PVMCPUCC pVCpu) */
7039 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7040 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseUd);
7041
7042 /* jump back to the return sequence. */
7043 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7044 }
7045 return off;
7046}
7047
7048
7049/**
7050 * Emits the code at the RaiseMf label.
7051 */
7052static uint32_t iemNativeEmitRaiseMf(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7053{
7054 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseMf);
7055 if (idxLabel != UINT32_MAX)
7056 {
7057 iemNativeLabelDefine(pReNative, idxLabel, off);
7058
7059 /* iemNativeHlpExecRaiseMf(PVMCPUCC pVCpu) */
7060 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7061 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseMf);
7062
7063 /* jump back to the return sequence. */
7064 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7065 }
7066 return off;
7067}
7068
7069
7070/**
7071 * Emits the code at the RaiseXf label.
7072 */
7073static uint32_t iemNativeEmitRaiseXf(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7074{
7075 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseXf);
7076 if (idxLabel != UINT32_MAX)
7077 {
7078 iemNativeLabelDefine(pReNative, idxLabel, off);
7079
7080 /* iemNativeHlpExecRaiseXf(PVMCPUCC pVCpu) */
7081 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7082 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseXf);
7083
7084 /* jump back to the return sequence. */
7085 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7086 }
7087 return off;
7088}
7089
7090
7091/**
7092 * Emits the code at the ReturnWithFlags label (returns
7093 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
7094 */
7095static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7096{
7097 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
7098 if (idxLabel != UINT32_MAX)
7099 {
7100 iemNativeLabelDefine(pReNative, idxLabel, off);
7101
7102 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
7103
7104 /* jump back to the return sequence. */
7105 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7106 }
7107 return off;
7108}
7109
7110
7111/**
7112 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
7113 */
7114static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7115{
7116 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
7117 if (idxLabel != UINT32_MAX)
7118 {
7119 iemNativeLabelDefine(pReNative, idxLabel, off);
7120
7121 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
7122
7123 /* jump back to the return sequence. */
7124 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7125 }
7126 return off;
7127}
7128
7129
7130/**
7131 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
7132 */
7133static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7134{
7135 /*
7136 * Generate the rc + rcPassUp fiddling code if needed.
7137 */
7138 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
7139 if (idxLabel != UINT32_MAX)
7140 {
7141 iemNativeLabelDefine(pReNative, idxLabel, off);
7142
7143 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
7144#ifdef RT_ARCH_AMD64
7145# ifdef RT_OS_WINDOWS
7146# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7147 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
7148# endif
7149 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
7150 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
7151# else
7152 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
7153 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
7154# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7155 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
7156# endif
7157# endif
7158# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7159 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
7160# endif
7161
7162#else
7163 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
7164 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7165 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
7166#endif
7167
7168 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
7169 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7170 }
7171 return off;
7172}
7173
7174
7175/**
7176 * Emits a standard epilog.
7177 */
7178static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
7179{
7180 *pidxReturnLabel = UINT32_MAX;
7181
7182 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
7183 off = iemNativeRegFlushPendingWrites(pReNative, off);
7184
7185 /*
7186 * Successful return, so clear the return register (eax, w0).
7187 */
7188 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
7189
7190 /*
7191 * Define label for common return point.
7192 */
7193 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
7194 *pidxReturnLabel = idxReturn;
7195
7196 /*
7197 * Restore registers and return.
7198 */
7199#ifdef RT_ARCH_AMD64
7200 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
7201
7202 /* Reposition esp at the r15 restore point. */
7203 pbCodeBuf[off++] = X86_OP_REX_W;
7204 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
7205 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
7206 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
7207
7208 /* Pop non-volatile registers and return */
7209 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
7210 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
7211 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
7212 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
7213 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
7214 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
7215 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
7216 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
7217# ifdef RT_OS_WINDOWS
7218 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
7219 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
7220# endif
7221 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
7222 pbCodeBuf[off++] = 0xc9; /* leave */
7223 pbCodeBuf[off++] = 0xc3; /* ret */
7224 pbCodeBuf[off++] = 0xcc; /* int3 poison */
7225
7226#elif RT_ARCH_ARM64
7227 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7228
7229 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
7230 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
7231 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
7232 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
7233 IEMNATIVE_FRAME_VAR_SIZE / 8);
7234 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
7235 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7236 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
7237 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7238 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
7239 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7240 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
7241 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7242 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
7243 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7244 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
7245 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
7246
7247 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
7248 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
7249 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
7250 IEMNATIVE_FRAME_SAVE_REG_SIZE);
7251
7252 /* retab / ret */
7253# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
7254 if (1)
7255 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
7256 else
7257# endif
7258 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
7259
7260#else
7261# error "port me"
7262#endif
7263 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7264
7265 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
7266}
7267
7268
7269/**
7270 * Emits a standard prolog.
7271 */
7272static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7273{
7274#ifdef RT_ARCH_AMD64
7275 /*
7276 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
7277 * reserving 64 bytes for stack variables plus 4 non-register argument
7278 * slots. Fixed register assignment: xBX = pReNative;
7279 *
7280 * Since we always do the same register spilling, we can use the same
7281 * unwind description for all the code.
7282 */
7283 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7284 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
7285 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
7286 pbCodeBuf[off++] = 0x8b;
7287 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
7288 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
7289 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
7290# ifdef RT_OS_WINDOWS
7291 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
7292 pbCodeBuf[off++] = 0x8b;
7293 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
7294 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
7295 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
7296# else
7297 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
7298 pbCodeBuf[off++] = 0x8b;
7299 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
7300# endif
7301 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
7302 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
7303 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
7304 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
7305 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
7306 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
7307 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
7308 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
7309
7310# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
7311 /* Save the frame pointer. */
7312 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
7313# endif
7314
7315 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
7316 X86_GREG_xSP,
7317 IEMNATIVE_FRAME_ALIGN_SIZE
7318 + IEMNATIVE_FRAME_VAR_SIZE
7319 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
7320 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
7321 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
7322 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
7323 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
7324
7325#elif RT_ARCH_ARM64
7326 /*
7327 * We set up a stack frame exactly like on x86, only we have to push the
7328 * return address our selves here. We save all non-volatile registers.
7329 */
7330 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
7331
7332# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
7333 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
7334 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
7335 * in any way conditional, so just emitting this instructions now and hoping for the best... */
7336 /* pacibsp */
7337 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
7338# endif
7339
7340 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
7341 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
7342 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
7343 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
7344 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
7345 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
7346 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7347 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
7348 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7349 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
7350 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7351 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
7352 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7353 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
7354 /* Save the BP and LR (ret address) registers at the top of the frame. */
7355 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7356 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
7357 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
7358 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
7359 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
7360 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
7361
7362 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
7363 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
7364
7365 /* mov r28, r0 */
7366 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
7367 /* mov r27, r1 */
7368 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
7369
7370# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
7371 /* Save the frame pointer. */
7372 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
7373 ARMV8_A64_REG_X2);
7374# endif
7375
7376#else
7377# error "port me"
7378#endif
7379 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7380 return off;
7381}
7382
7383
7384
7385
7386/*********************************************************************************************************************************
7387* Emitters for IEM_MC_BEGIN and IEM_MC_END. *
7388*********************************************************************************************************************************/
7389
7390#define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
7391 { \
7392 Assert(pReNative->Core.bmVars == 0); \
7393 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
7394 Assert(pReNative->Core.bmStack == 0); \
7395 pReNative->fMc = (a_fMcFlags); \
7396 pReNative->fCImpl = (a_fCImplFlags); \
7397 pReNative->cArgs = ((a_cArgs) + iemNativeArgGetHiddenArgCount(pReNative))
7398
7399/** We have to get to the end in recompilation mode, as otherwise we won't
7400 * generate code for all the IEM_MC_IF_XXX branches. */
7401#define IEM_MC_END() \
7402 iemNativeVarFreeAll(pReNative); \
7403 } return off
7404
7405
7406
7407/*********************************************************************************************************************************
7408* Native Emitter Support. *
7409*********************************************************************************************************************************/
7410
7411
7412#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
7413
7414#define IEM_MC_NATIVE_ELSE() } else {
7415
7416#define IEM_MC_NATIVE_ENDIF() } ((void)0)
7417
7418
7419#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
7420 off = a_fnEmitter(pReNative, off)
7421
7422#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
7423 off = a_fnEmitter(pReNative, off, (a0))
7424
7425#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
7426 off = a_fnEmitter(pReNative, off, (a0), (a1))
7427
7428#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
7429 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
7430
7431#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
7432 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
7433
7434#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
7435 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
7436
7437#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
7438 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
7439
7440#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
7441 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
7442
7443#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
7444 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
7445
7446
7447
7448/*********************************************************************************************************************************
7449* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
7450*********************************************************************************************************************************/
7451
7452#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
7453 pReNative->fMc = 0; \
7454 pReNative->fCImpl = (a_fFlags); \
7455 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
7456
7457
7458#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
7459 pReNative->fMc = 0; \
7460 pReNative->fCImpl = (a_fFlags); \
7461 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
7462
7463DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7464 uint8_t idxInstr, uint64_t a_fGstShwFlush,
7465 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
7466{
7467 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
7468}
7469
7470
7471#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
7472 pReNative->fMc = 0; \
7473 pReNative->fCImpl = (a_fFlags); \
7474 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
7475 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
7476
7477DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7478 uint8_t idxInstr, uint64_t a_fGstShwFlush,
7479 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
7480{
7481 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
7482}
7483
7484
7485#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
7486 pReNative->fMc = 0; \
7487 pReNative->fCImpl = (a_fFlags); \
7488 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
7489 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
7490
7491DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7492 uint8_t idxInstr, uint64_t a_fGstShwFlush,
7493 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
7494 uint64_t uArg2)
7495{
7496 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
7497}
7498
7499
7500
7501/*********************************************************************************************************************************
7502* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
7503*********************************************************************************************************************************/
7504
7505/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
7506 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
7507DECL_INLINE_THROW(uint32_t)
7508iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7509{
7510 /*
7511 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
7512 * return with special status code and make the execution loop deal with
7513 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
7514 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
7515 * could continue w/o interruption, it probably will drop into the
7516 * debugger, so not worth the effort of trying to services it here and we
7517 * just lump it in with the handling of the others.
7518 *
7519 * To simplify the code and the register state management even more (wrt
7520 * immediate in AND operation), we always update the flags and skip the
7521 * extra check associated conditional jump.
7522 */
7523 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
7524 <= UINT32_MAX);
7525#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
7526 AssertMsg( pReNative->idxCurCall == 0
7527 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)),
7528 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)));
7529#endif
7530
7531 /*
7532 * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
7533 * any pending register writes must be flushed.
7534 */
7535 off = iemNativeRegFlushPendingWrites(pReNative, off);
7536
7537 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7538 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
7539 true /*fSkipLivenessAssert*/);
7540 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
7541 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
7542 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
7543 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
7544 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
7545
7546 /* Free but don't flush the EFLAGS register. */
7547 iemNativeRegFreeTmp(pReNative, idxEflReg);
7548
7549 return off;
7550}
7551
7552
7553/** The VINF_SUCCESS dummy. */
7554template<int const a_rcNormal>
7555DECL_FORCE_INLINE(uint32_t)
7556iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
7557{
7558 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
7559 if (a_rcNormal != VINF_SUCCESS)
7560 {
7561#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7562 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7563#else
7564 RT_NOREF_PV(idxInstr);
7565#endif
7566
7567 /* As this code returns from the TB any pending register writes must be flushed. */
7568 off = iemNativeRegFlushPendingWrites(pReNative, off);
7569
7570 return iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_ReturnBreak);
7571 }
7572 return off;
7573}
7574
7575
7576#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
7577 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
7578 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7579
7580#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
7581 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
7582 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7583 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7584
7585/** Same as iemRegAddToRip64AndFinishingNoFlags. */
7586DECL_INLINE_THROW(uint32_t)
7587iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
7588{
7589#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
7590# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
7591 if (!pReNative->Core.offPc)
7592 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7593# endif
7594
7595 /* Allocate a temporary PC register. */
7596 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
7597
7598 /* Perform the addition and store the result. */
7599 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
7600 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7601
7602 /* Free but don't flush the PC register. */
7603 iemNativeRegFreeTmp(pReNative, idxPcReg);
7604#endif
7605
7606#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7607 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
7608
7609 pReNative->Core.offPc += cbInstr;
7610# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
7611 off = iemNativePcAdjustCheck(pReNative, off);
7612# endif
7613 if (pReNative->cCondDepth)
7614 off = iemNativeEmitPcWriteback(pReNative, off);
7615 else
7616 pReNative->Core.cInstrPcUpdateSkipped++;
7617#endif
7618
7619 return off;
7620}
7621
7622
7623#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
7624 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
7625 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7626
7627#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
7628 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
7629 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7630 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7631
7632/** Same as iemRegAddToEip32AndFinishingNoFlags. */
7633DECL_INLINE_THROW(uint32_t)
7634iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
7635{
7636#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
7637# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
7638 if (!pReNative->Core.offPc)
7639 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7640# endif
7641
7642 /* Allocate a temporary PC register. */
7643 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
7644
7645 /* Perform the addition and store the result. */
7646 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
7647 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7648
7649 /* Free but don't flush the PC register. */
7650 iemNativeRegFreeTmp(pReNative, idxPcReg);
7651#endif
7652
7653#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7654 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
7655
7656 pReNative->Core.offPc += cbInstr;
7657# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
7658 off = iemNativePcAdjustCheck(pReNative, off);
7659# endif
7660 if (pReNative->cCondDepth)
7661 off = iemNativeEmitPcWriteback(pReNative, off);
7662 else
7663 pReNative->Core.cInstrPcUpdateSkipped++;
7664#endif
7665
7666 return off;
7667}
7668
7669
7670#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
7671 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
7672 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7673
7674#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
7675 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
7676 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7677 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7678
7679/** Same as iemRegAddToIp16AndFinishingNoFlags. */
7680DECL_INLINE_THROW(uint32_t)
7681iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
7682{
7683#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
7684# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
7685 if (!pReNative->Core.offPc)
7686 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7687# endif
7688
7689 /* Allocate a temporary PC register. */
7690 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
7691
7692 /* Perform the addition and store the result. */
7693 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
7694 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
7695 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7696
7697 /* Free but don't flush the PC register. */
7698 iemNativeRegFreeTmp(pReNative, idxPcReg);
7699#endif
7700
7701#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7702 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
7703
7704 pReNative->Core.offPc += cbInstr;
7705# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
7706 off = iemNativePcAdjustCheck(pReNative, off);
7707# endif
7708 if (pReNative->cCondDepth)
7709 off = iemNativeEmitPcWriteback(pReNative, off);
7710 else
7711 pReNative->Core.cInstrPcUpdateSkipped++;
7712#endif
7713
7714 return off;
7715}
7716
7717
7718
7719/*********************************************************************************************************************************
7720* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
7721*********************************************************************************************************************************/
7722
7723#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
7724 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
7725 (a_enmEffOpSize), pCallEntry->idxInstr); \
7726 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7727
7728#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
7729 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
7730 (a_enmEffOpSize), pCallEntry->idxInstr); \
7731 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7732 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7733
7734#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
7735 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
7736 IEMMODE_16BIT, pCallEntry->idxInstr); \
7737 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7738
7739#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
7740 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
7741 IEMMODE_16BIT, pCallEntry->idxInstr); \
7742 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7743 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7744
7745#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
7746 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
7747 IEMMODE_64BIT, pCallEntry->idxInstr); \
7748 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7749
7750#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
7751 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
7752 IEMMODE_64BIT, pCallEntry->idxInstr); \
7753 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7754 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7755
7756/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
7757 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
7758 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
7759DECL_INLINE_THROW(uint32_t)
7760iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
7761 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
7762{
7763 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
7764
7765 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
7766 off = iemNativeRegFlushPendingWrites(pReNative, off);
7767
7768#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7769 Assert(pReNative->Core.offPc == 0);
7770
7771 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
7772#endif
7773
7774 /* Allocate a temporary PC register. */
7775 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
7776
7777 /* Perform the addition. */
7778 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
7779
7780 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
7781 {
7782 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
7783 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
7784 }
7785 else
7786 {
7787 /* Just truncate the result to 16-bit IP. */
7788 Assert(enmEffOpSize == IEMMODE_16BIT);
7789 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
7790 }
7791 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7792
7793 /* Free but don't flush the PC register. */
7794 iemNativeRegFreeTmp(pReNative, idxPcReg);
7795
7796 return off;
7797}
7798
7799
7800#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
7801 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
7802 (a_enmEffOpSize), pCallEntry->idxInstr); \
7803 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7804
7805#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
7806 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
7807 (a_enmEffOpSize), pCallEntry->idxInstr); \
7808 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7809 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7810
7811#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
7812 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
7813 IEMMODE_16BIT, pCallEntry->idxInstr); \
7814 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7815
7816#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
7817 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
7818 IEMMODE_16BIT, pCallEntry->idxInstr); \
7819 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7820 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7821
7822#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
7823 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
7824 IEMMODE_32BIT, pCallEntry->idxInstr); \
7825 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7826
7827#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
7828 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
7829 IEMMODE_32BIT, pCallEntry->idxInstr); \
7830 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7831 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7832
7833/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
7834 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
7835 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
7836DECL_INLINE_THROW(uint32_t)
7837iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
7838 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
7839{
7840 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
7841
7842 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
7843 off = iemNativeRegFlushPendingWrites(pReNative, off);
7844
7845#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7846 Assert(pReNative->Core.offPc == 0);
7847
7848 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
7849#endif
7850
7851 /* Allocate a temporary PC register. */
7852 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
7853
7854 /* Perform the addition. */
7855 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
7856
7857 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
7858 if (enmEffOpSize == IEMMODE_16BIT)
7859 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
7860
7861 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
7862/** @todo we can skip this in 32-bit FLAT mode. */
7863 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
7864
7865 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7866
7867 /* Free but don't flush the PC register. */
7868 iemNativeRegFreeTmp(pReNative, idxPcReg);
7869
7870 return off;
7871}
7872
7873
7874#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
7875 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
7876 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7877
7878#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
7879 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
7880 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7881 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7882
7883#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
7884 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
7885 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7886
7887#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
7888 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
7889 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7890 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7891
7892#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
7893 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
7894 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7895
7896#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
7897 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
7898 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7899 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7900
7901/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
7902DECL_INLINE_THROW(uint32_t)
7903iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7904 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
7905{
7906 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
7907 off = iemNativeRegFlushPendingWrites(pReNative, off);
7908
7909#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7910 Assert(pReNative->Core.offPc == 0);
7911
7912 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
7913#endif
7914
7915 /* Allocate a temporary PC register. */
7916 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
7917
7918 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
7919 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
7920 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
7921 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
7922 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7923
7924 /* Free but don't flush the PC register. */
7925 iemNativeRegFreeTmp(pReNative, idxPcReg);
7926
7927 return off;
7928}
7929
7930
7931
7932/*********************************************************************************************************************************
7933* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
7934*********************************************************************************************************************************/
7935
7936/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
7937#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
7938 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
7939
7940/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
7941#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
7942 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
7943
7944/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
7945#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
7946 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
7947
7948/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
7949 * clears flags. */
7950#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
7951 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
7952 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
7953
7954/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
7955 * clears flags. */
7956#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
7957 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
7958 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
7959
7960/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
7961 * clears flags. */
7962#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
7963 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
7964 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
7965
7966#undef IEM_MC_SET_RIP_U16_AND_FINISH
7967
7968
7969/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
7970#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
7971 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
7972
7973/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
7974#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
7975 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
7976
7977/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
7978 * clears flags. */
7979#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
7980 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
7981 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
7982
7983/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
7984 * and clears flags. */
7985#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
7986 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
7987 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
7988
7989#undef IEM_MC_SET_RIP_U32_AND_FINISH
7990
7991
7992/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
7993#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
7994 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
7995
7996/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
7997 * and clears flags. */
7998#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
7999 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
8000 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
8001
8002#undef IEM_MC_SET_RIP_U64_AND_FINISH
8003
8004
8005/** Same as iemRegRipJumpU16AndFinishNoFlags,
8006 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
8007DECL_INLINE_THROW(uint32_t)
8008iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
8009 uint8_t idxInstr, uint8_t cbVar)
8010{
8011 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
8012 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
8013
8014 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
8015 off = iemNativeRegFlushPendingWrites(pReNative, off);
8016
8017#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8018 Assert(pReNative->Core.offPc == 0);
8019
8020 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
8021#endif
8022
8023 /* Get a register with the new PC loaded from idxVarPc.
8024 Note! This ASSUMES that the high bits of the GPR is zeroed. */
8025 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
8026
8027 /* Check limit (may #GP(0) + exit TB). */
8028 if (!f64Bit)
8029/** @todo we can skip this test in FLAT 32-bit mode. */
8030 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
8031 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
8032 else if (cbVar > sizeof(uint32_t))
8033 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
8034
8035 /* Store the result. */
8036 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
8037
8038 iemNativeVarRegisterRelease(pReNative, idxVarPc);
8039 /** @todo implictly free the variable? */
8040
8041 return off;
8042}
8043
8044
8045
8046/*********************************************************************************************************************************
8047* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
8048*********************************************************************************************************************************/
8049
8050#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
8051 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
8052
8053/**
8054 * Emits code to check if a \#NM exception should be raised.
8055 *
8056 * @returns New code buffer offset, UINT32_MAX on failure.
8057 * @param pReNative The native recompile state.
8058 * @param off The code buffer offset.
8059 * @param idxInstr The current instruction.
8060 */
8061DECL_INLINE_THROW(uint32_t)
8062iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
8063{
8064 /*
8065 * Make sure we don't have any outstanding guest register writes as we may
8066 * raise an #NM and all guest register must be up to date in CPUMCTX.
8067 *
8068 * @todo r=aeichner Can we postpone this to the RaiseNm path?
8069 */
8070 off = iemNativeRegFlushPendingWrites(pReNative, off);
8071
8072#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8073 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8074#else
8075 RT_NOREF(idxInstr);
8076#endif
8077
8078 /* Allocate a temporary CR0 register. */
8079 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
8080 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
8081
8082 /*
8083 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
8084 * return raisexcpt();
8085 */
8086 /* Test and jump. */
8087 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM | X86_CR0_TS, idxLabelRaiseNm);
8088
8089 /* Free but don't flush the CR0 register. */
8090 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
8091
8092 return off;
8093}
8094
8095
8096#define IEM_MC_MAYBE_RAISE_FPU_XCPT() \
8097 off = iemNativeEmitMaybeFpuException(pReNative, off, pCallEntry->idxInstr)
8098
8099/**
8100 * Emits code to check if a \#MF exception should be raised.
8101 *
8102 * @returns New code buffer offset, UINT32_MAX on failure.
8103 * @param pReNative The native recompile state.
8104 * @param off The code buffer offset.
8105 * @param idxInstr The current instruction.
8106 */
8107DECL_INLINE_THROW(uint32_t)
8108iemNativeEmitMaybeRaiseFpuException(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
8109{
8110 /*
8111 * Make sure we don't have any outstanding guest register writes as we may
8112 * raise an #MF and all guest register must be up to date in CPUMCTX.
8113 *
8114 * @todo r=aeichner Can we postpone this to the RaiseMf path?
8115 */
8116 off = iemNativeRegFlushPendingWrites(pReNative, off);
8117
8118#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8119 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8120#else
8121 RT_NOREF(idxInstr);
8122#endif
8123
8124 /* Allocate a temporary FSW register. */
8125 uint8_t const idxFpuFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw, kIemNativeGstRegUse_ReadOnly);
8126 uint8_t const idxLabelRaiseMf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseMf);
8127
8128 /*
8129 * if (FSW & X86_FSW_ES != 0)
8130 * return raisexcpt();
8131 */
8132 /* Test and jump. */
8133 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxFpuFswReg, X86_FSW_ES, idxLabelRaiseMf);
8134
8135 /* Free but don't flush the FSW register. */
8136 iemNativeRegFreeTmp(pReNative, idxFpuFswReg);
8137
8138 return off;
8139}
8140
8141
8142#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
8143 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
8144
8145/**
8146 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
8147 *
8148 * @returns New code buffer offset, UINT32_MAX on failure.
8149 * @param pReNative The native recompile state.
8150 * @param off The code buffer offset.
8151 * @param idxInstr The current instruction.
8152 */
8153DECL_INLINE_THROW(uint32_t)
8154iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
8155{
8156 /*
8157 * Make sure we don't have any outstanding guest register writes as we may
8158 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
8159 *
8160 * @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path?
8161 */
8162 off = iemNativeRegFlushPendingWrites(pReNative, off, false /*fFlushShadows*/);
8163
8164#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8165 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8166#else
8167 RT_NOREF(idxInstr);
8168#endif
8169
8170 /* Allocate a temporary CR0 and CR4 register. */
8171 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
8172 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4, kIemNativeGstRegUse_ReadOnly);
8173 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
8174 uint8_t const idxLabelRaiseUd = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseUd);
8175
8176 /** @todo r=aeichner Optimize this more later to have less compares and branches,
8177 * (see IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() in IEMMc.h but check that it has some
8178 * actual performance benefit first). */
8179 /*
8180 * if (cr0 & X86_CR0_EM)
8181 * return raisexcpt();
8182 */
8183 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxCr0Reg, X86_CR0_EM_BIT, idxLabelRaiseUd);
8184 /*
8185 * if (!(cr4 & X86_CR4_OSFXSR))
8186 * return raisexcpt();
8187 */
8188 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxCr4Reg, X86_CR4_OSFXSR_BIT, idxLabelRaiseUd);
8189 /*
8190 * if (cr0 & X86_CR0_TS)
8191 * return raisexcpt();
8192 */
8193 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxCr0Reg, X86_CR0_TS_BIT, idxLabelRaiseNm);
8194
8195 /* Free but don't flush the CR0 and CR4 register. */
8196 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
8197 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
8198
8199 return off;
8200}
8201
8202
8203#define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
8204 off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
8205
8206/**
8207 * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
8208 *
8209 * @returns New code buffer offset, UINT32_MAX on failure.
8210 * @param pReNative The native recompile state.
8211 * @param off The code buffer offset.
8212 * @param idxInstr The current instruction.
8213 */
8214DECL_INLINE_THROW(uint32_t)
8215iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
8216{
8217 /*
8218 * Make sure we don't have any outstanding guest register writes as we may
8219 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
8220 *
8221 * @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path?
8222 */
8223 off = iemNativeRegFlushPendingWrites(pReNative, off, false /*fFlushShadows*/);
8224
8225#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8226 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8227#else
8228 RT_NOREF(idxInstr);
8229#endif
8230
8231 /* Allocate a temporary CR0, CR4 and XCR0 register. */
8232 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
8233 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4, kIemNativeGstRegUse_ReadOnly);
8234 uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0, kIemNativeGstRegUse_ReadOnly);
8235 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
8236 uint8_t const idxLabelRaiseUd = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseUd);
8237
8238 /** @todo r=aeichner Optimize this more later to have less compares and branches,
8239 * (see IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() in IEMMc.h but check that it has some
8240 * actual performance benefit first). */
8241 /*
8242 * if ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE))
8243 * return raisexcpt();
8244 */
8245 const uint8_t idxRegTmp = iemNativeRegAllocTmpImm(pReNative, &off, XSAVE_C_YMM | XSAVE_C_SSE);
8246 off = iemNativeEmitAndGprByGpr(pReNative, off, idxRegTmp, idxXcr0Reg);
8247 off = iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(pReNative, off, idxRegTmp, XSAVE_C_YMM | XSAVE_C_SSE, idxLabelRaiseUd);
8248 iemNativeRegFreeTmp(pReNative, idxRegTmp);
8249
8250 /*
8251 * if (!(cr4 & X86_CR4_OSXSAVE))
8252 * return raisexcpt();
8253 */
8254 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxCr4Reg, X86_CR4_OSXSAVE_BIT, idxLabelRaiseUd);
8255 /*
8256 * if (cr0 & X86_CR0_TS)
8257 * return raisexcpt();
8258 */
8259 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxCr0Reg, X86_CR0_TS_BIT, idxLabelRaiseNm);
8260
8261 /* Free but don't flush the CR0, CR4 and XCR0 register. */
8262 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
8263 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
8264 iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
8265
8266 return off;
8267}
8268
8269
8270#define IEM_MC_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT() \
8271 off = iemNativeEmitRaiseSseAvxSimdFpXcpt(pReNative, off, pCallEntry->idxInstr)
8272
8273/**
8274 * Emits code to raise a SIMD floating point (either \#UD or \#XF) should be raised.
8275 *
8276 * @returns New code buffer offset, UINT32_MAX on failure.
8277 * @param pReNative The native recompile state.
8278 * @param off The code buffer offset.
8279 * @param idxInstr The current instruction.
8280 */
8281DECL_INLINE_THROW(uint32_t)
8282iemNativeEmitRaiseSseAvxSimdFpXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
8283{
8284 /*
8285 * Make sure we don't have any outstanding guest register writes as we may
8286 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
8287 *
8288 * @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path?
8289 */
8290 off = iemNativeRegFlushPendingWrites(pReNative, off);
8291
8292#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8293 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8294#else
8295 RT_NOREF(idxInstr);
8296#endif
8297
8298 /* Allocate a temporary CR4 register. */
8299 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4, kIemNativeGstRegUse_ReadOnly);
8300 uint8_t const idxLabelRaiseXf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseXf);
8301 uint8_t const idxLabelRaiseUd = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseUd);
8302
8303 /*
8304 * if (!(cr4 & X86_CR4_OSXMMEEXCPT))
8305 * return raisexcpt();
8306 */
8307 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxCr4Reg, X86_CR4_OSXMMEEXCPT_BIT, idxLabelRaiseXf);
8308
8309 /* raise \#UD exception unconditionally. */
8310 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelRaiseUd);
8311
8312 /* Free but don't flush the CR4 register. */
8313 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
8314
8315 return off;
8316}
8317
8318
8319
8320/*********************************************************************************************************************************
8321* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
8322*********************************************************************************************************************************/
8323
8324/**
8325 * Pushes an IEM_MC_IF_XXX onto the condition stack.
8326 *
8327 * @returns Pointer to the condition stack entry on success, NULL on failure
8328 * (too many nestings)
8329 */
8330DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
8331{
8332#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8333 *poff = iemNativeRegFlushPendingWrites(pReNative, *poff);
8334#endif
8335
8336 uint32_t const idxStack = pReNative->cCondDepth;
8337 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
8338
8339 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
8340 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
8341
8342 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
8343 pEntry->fInElse = false;
8344 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
8345 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
8346
8347 return pEntry;
8348}
8349
8350
8351/**
8352 * Start of the if-block, snapshotting the register and variable state.
8353 */
8354DECL_INLINE_THROW(void)
8355iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
8356{
8357 Assert(offIfBlock != UINT32_MAX);
8358 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
8359 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
8360 Assert(!pEntry->fInElse);
8361
8362 /* Define the start of the IF block if request or for disassembly purposes. */
8363 if (idxLabelIf != UINT32_MAX)
8364 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
8365#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8366 else
8367 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
8368#else
8369 RT_NOREF(offIfBlock);
8370#endif
8371
8372#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8373 Assert(pReNative->Core.offPc == 0);
8374#endif
8375
8376 /* Copy the initial state so we can restore it in the 'else' block. */
8377 pEntry->InitialState = pReNative->Core;
8378}
8379
8380
8381#define IEM_MC_ELSE() } while (0); \
8382 off = iemNativeEmitElse(pReNative, off); \
8383 do {
8384
8385/** Emits code related to IEM_MC_ELSE. */
8386DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
8387{
8388 /* Check sanity and get the conditional stack entry. */
8389 Assert(off != UINT32_MAX);
8390 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
8391 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
8392 Assert(!pEntry->fInElse);
8393
8394 /* Jump to the endif */
8395 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
8396
8397 /* Define the else label and enter the else part of the condition. */
8398 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
8399 pEntry->fInElse = true;
8400
8401#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8402 Assert(pReNative->Core.offPc == 0);
8403#endif
8404
8405 /* Snapshot the core state so we can do a merge at the endif and restore
8406 the snapshot we took at the start of the if-block. */
8407 pEntry->IfFinalState = pReNative->Core;
8408 pReNative->Core = pEntry->InitialState;
8409
8410 return off;
8411}
8412
8413
8414#define IEM_MC_ENDIF() } while (0); \
8415 off = iemNativeEmitEndIf(pReNative, off)
8416
8417/** Emits code related to IEM_MC_ENDIF. */
8418DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
8419{
8420 /* Check sanity and get the conditional stack entry. */
8421 Assert(off != UINT32_MAX);
8422 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
8423 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
8424
8425#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8426 Assert(pReNative->Core.offPc == 0);
8427#endif
8428
8429 /*
8430 * Now we have find common group with the core state at the end of the
8431 * if-final. Use the smallest common denominator and just drop anything
8432 * that isn't the same in both states.
8433 */
8434 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
8435 * which is why we're doing this at the end of the else-block.
8436 * But we'd need more info about future for that to be worth the effort. */
8437 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
8438 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
8439 {
8440 /* shadow guest stuff first. */
8441 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
8442 if (fGstRegs)
8443 {
8444 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
8445 do
8446 {
8447 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
8448 fGstRegs &= ~RT_BIT_64(idxGstReg);
8449
8450 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
8451 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
8452 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
8453 {
8454 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
8455 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
8456 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
8457 }
8458 } while (fGstRegs);
8459 }
8460 else
8461 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
8462
8463 /* Check variables next. For now we must require them to be identical
8464 or stuff we can recreate. */
8465 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
8466 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
8467 if (fVars)
8468 {
8469 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
8470 do
8471 {
8472 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
8473 fVars &= ~RT_BIT_32(idxVar);
8474
8475 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
8476 {
8477 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
8478 continue;
8479 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
8480 {
8481 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8482 if (idxHstReg != UINT8_MAX)
8483 {
8484 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8485 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8486 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
8487 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
8488 }
8489 continue;
8490 }
8491 }
8492 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
8493 continue;
8494
8495 /* Irreconcilable, so drop it. */
8496 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8497 if (idxHstReg != UINT8_MAX)
8498 {
8499 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8500 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8501 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
8502 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
8503 }
8504 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
8505 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8506 } while (fVars);
8507 }
8508
8509 /* Finally, check that the host register allocations matches. */
8510 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
8511 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
8512 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
8513 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
8514 }
8515
8516 /*
8517 * Define the endif label and maybe the else one if we're still in the 'if' part.
8518 */
8519 if (!pEntry->fInElse)
8520 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
8521 else
8522 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
8523 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
8524
8525 /* Pop the conditional stack.*/
8526 pReNative->cCondDepth -= 1;
8527
8528 return off;
8529}
8530
8531
8532#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
8533 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
8534 do {
8535
8536/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
8537DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
8538{
8539 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8540
8541 /* Get the eflags. */
8542 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8543 kIemNativeGstRegUse_ReadOnly);
8544
8545 /* Test and jump. */
8546 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
8547
8548 /* Free but don't flush the EFlags register. */
8549 iemNativeRegFreeTmp(pReNative, idxEflReg);
8550
8551 /* Make a copy of the core state now as we start the if-block. */
8552 iemNativeCondStartIfBlock(pReNative, off);
8553
8554 return off;
8555}
8556
8557
8558#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
8559 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
8560 do {
8561
8562/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
8563DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
8564{
8565 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8566
8567 /* Get the eflags. */
8568 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8569 kIemNativeGstRegUse_ReadOnly);
8570
8571 /* Test and jump. */
8572 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
8573
8574 /* Free but don't flush the EFlags register. */
8575 iemNativeRegFreeTmp(pReNative, idxEflReg);
8576
8577 /* Make a copy of the core state now as we start the if-block. */
8578 iemNativeCondStartIfBlock(pReNative, off);
8579
8580 return off;
8581}
8582
8583
8584#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
8585 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
8586 do {
8587
8588/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
8589DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
8590{
8591 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8592
8593 /* Get the eflags. */
8594 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8595 kIemNativeGstRegUse_ReadOnly);
8596
8597 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
8598 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
8599
8600 /* Test and jump. */
8601 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
8602
8603 /* Free but don't flush the EFlags register. */
8604 iemNativeRegFreeTmp(pReNative, idxEflReg);
8605
8606 /* Make a copy of the core state now as we start the if-block. */
8607 iemNativeCondStartIfBlock(pReNative, off);
8608
8609 return off;
8610}
8611
8612
8613#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
8614 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
8615 do {
8616
8617/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
8618DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
8619{
8620 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8621
8622 /* Get the eflags. */
8623 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8624 kIemNativeGstRegUse_ReadOnly);
8625
8626 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
8627 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
8628
8629 /* Test and jump. */
8630 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
8631
8632 /* Free but don't flush the EFlags register. */
8633 iemNativeRegFreeTmp(pReNative, idxEflReg);
8634
8635 /* Make a copy of the core state now as we start the if-block. */
8636 iemNativeCondStartIfBlock(pReNative, off);
8637
8638 return off;
8639}
8640
8641
8642#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
8643 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
8644 do {
8645
8646#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
8647 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
8648 do {
8649
8650/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
8651DECL_INLINE_THROW(uint32_t)
8652iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8653 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
8654{
8655 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8656
8657 /* Get the eflags. */
8658 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8659 kIemNativeGstRegUse_ReadOnly);
8660
8661 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
8662 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
8663
8664 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
8665 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
8666 Assert(iBitNo1 != iBitNo2);
8667
8668#ifdef RT_ARCH_AMD64
8669 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
8670
8671 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
8672 if (iBitNo1 > iBitNo2)
8673 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
8674 else
8675 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
8676 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
8677
8678#elif defined(RT_ARCH_ARM64)
8679 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
8680 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8681
8682 /* and tmpreg, eflreg, #1<<iBitNo1 */
8683 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
8684
8685 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
8686 if (iBitNo1 > iBitNo2)
8687 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
8688 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
8689 else
8690 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
8691 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
8692
8693 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8694
8695#else
8696# error "Port me"
8697#endif
8698
8699 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
8700 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
8701 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
8702
8703 /* Free but don't flush the EFlags and tmp registers. */
8704 iemNativeRegFreeTmp(pReNative, idxTmpReg);
8705 iemNativeRegFreeTmp(pReNative, idxEflReg);
8706
8707 /* Make a copy of the core state now as we start the if-block. */
8708 iemNativeCondStartIfBlock(pReNative, off);
8709
8710 return off;
8711}
8712
8713
8714#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
8715 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
8716 do {
8717
8718#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
8719 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
8720 do {
8721
8722/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
8723 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
8724DECL_INLINE_THROW(uint32_t)
8725iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
8726 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
8727{
8728 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8729
8730 /* We need an if-block label for the non-inverted variant. */
8731 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
8732 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
8733
8734 /* Get the eflags. */
8735 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8736 kIemNativeGstRegUse_ReadOnly);
8737
8738 /* Translate the flag masks to bit numbers. */
8739 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
8740 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
8741
8742 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
8743 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
8744 Assert(iBitNo1 != iBitNo);
8745
8746 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
8747 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
8748 Assert(iBitNo2 != iBitNo);
8749 Assert(iBitNo2 != iBitNo1);
8750
8751#ifdef RT_ARCH_AMD64
8752 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
8753#elif defined(RT_ARCH_ARM64)
8754 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
8755#endif
8756
8757 /* Check for the lone bit first. */
8758 if (!fInverted)
8759 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
8760 else
8761 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
8762
8763 /* Then extract and compare the other two bits. */
8764#ifdef RT_ARCH_AMD64
8765 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
8766 if (iBitNo1 > iBitNo2)
8767 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
8768 else
8769 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
8770 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
8771
8772#elif defined(RT_ARCH_ARM64)
8773 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8774
8775 /* and tmpreg, eflreg, #1<<iBitNo1 */
8776 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
8777
8778 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
8779 if (iBitNo1 > iBitNo2)
8780 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
8781 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
8782 else
8783 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
8784 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
8785
8786 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8787
8788#else
8789# error "Port me"
8790#endif
8791
8792 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
8793 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
8794 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
8795
8796 /* Free but don't flush the EFlags and tmp registers. */
8797 iemNativeRegFreeTmp(pReNative, idxTmpReg);
8798 iemNativeRegFreeTmp(pReNative, idxEflReg);
8799
8800 /* Make a copy of the core state now as we start the if-block. */
8801 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
8802
8803 return off;
8804}
8805
8806
8807#define IEM_MC_IF_CX_IS_NZ() \
8808 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
8809 do {
8810
8811/** Emits code for IEM_MC_IF_CX_IS_NZ. */
8812DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
8813{
8814 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8815
8816 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
8817 kIemNativeGstRegUse_ReadOnly);
8818 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
8819 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
8820
8821 iemNativeCondStartIfBlock(pReNative, off);
8822 return off;
8823}
8824
8825
8826#define IEM_MC_IF_ECX_IS_NZ() \
8827 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
8828 do {
8829
8830#define IEM_MC_IF_RCX_IS_NZ() \
8831 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
8832 do {
8833
8834/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
8835DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
8836{
8837 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8838
8839 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
8840 kIemNativeGstRegUse_ReadOnly);
8841 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
8842 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
8843
8844 iemNativeCondStartIfBlock(pReNative, off);
8845 return off;
8846}
8847
8848
8849#define IEM_MC_IF_CX_IS_NOT_ONE() \
8850 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
8851 do {
8852
8853/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
8854DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
8855{
8856 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8857
8858 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
8859 kIemNativeGstRegUse_ReadOnly);
8860#ifdef RT_ARCH_AMD64
8861 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
8862#else
8863 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
8864 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
8865 iemNativeRegFreeTmp(pReNative, idxTmpReg);
8866#endif
8867 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
8868
8869 iemNativeCondStartIfBlock(pReNative, off);
8870 return off;
8871}
8872
8873
8874#define IEM_MC_IF_ECX_IS_NOT_ONE() \
8875 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
8876 do {
8877
8878#define IEM_MC_IF_RCX_IS_NOT_ONE() \
8879 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
8880 do {
8881
8882/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
8883DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
8884{
8885 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8886
8887 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
8888 kIemNativeGstRegUse_ReadOnly);
8889 if (f64Bit)
8890 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
8891 else
8892 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
8893 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
8894
8895 iemNativeCondStartIfBlock(pReNative, off);
8896 return off;
8897}
8898
8899
8900#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
8901 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
8902 do {
8903
8904#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
8905 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
8906 do {
8907
8908/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
8909 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
8910DECL_INLINE_THROW(uint32_t)
8911iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
8912{
8913 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8914
8915 /* We have to load both RCX and EFLAGS before we can start branching,
8916 otherwise we'll end up in the else-block with an inconsistent
8917 register allocator state.
8918 Doing EFLAGS first as it's more likely to be loaded, right? */
8919 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8920 kIemNativeGstRegUse_ReadOnly);
8921 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
8922 kIemNativeGstRegUse_ReadOnly);
8923
8924 /** @todo we could reduce this to a single branch instruction by spending a
8925 * temporary register and some setnz stuff. Not sure if loops are
8926 * worth it. */
8927 /* Check CX. */
8928#ifdef RT_ARCH_AMD64
8929 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
8930#else
8931 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
8932 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
8933 iemNativeRegFreeTmp(pReNative, idxTmpReg);
8934#endif
8935
8936 /* Check the EFlags bit. */
8937 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
8938 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
8939 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
8940 !fCheckIfSet /*fJmpIfSet*/);
8941
8942 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
8943 iemNativeRegFreeTmp(pReNative, idxEflReg);
8944
8945 iemNativeCondStartIfBlock(pReNative, off);
8946 return off;
8947}
8948
8949
8950#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
8951 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
8952 do {
8953
8954#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
8955 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
8956 do {
8957
8958#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
8959 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
8960 do {
8961
8962#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
8963 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
8964 do {
8965
8966/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
8967 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
8968 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
8969 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
8970DECL_INLINE_THROW(uint32_t)
8971iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8972 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
8973{
8974 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8975
8976 /* We have to load both RCX and EFLAGS before we can start branching,
8977 otherwise we'll end up in the else-block with an inconsistent
8978 register allocator state.
8979 Doing EFLAGS first as it's more likely to be loaded, right? */
8980 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8981 kIemNativeGstRegUse_ReadOnly);
8982 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
8983 kIemNativeGstRegUse_ReadOnly);
8984
8985 /** @todo we could reduce this to a single branch instruction by spending a
8986 * temporary register and some setnz stuff. Not sure if loops are
8987 * worth it. */
8988 /* Check RCX/ECX. */
8989 if (f64Bit)
8990 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
8991 else
8992 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
8993
8994 /* Check the EFlags bit. */
8995 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
8996 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
8997 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
8998 !fCheckIfSet /*fJmpIfSet*/);
8999
9000 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
9001 iemNativeRegFreeTmp(pReNative, idxEflReg);
9002
9003 iemNativeCondStartIfBlock(pReNative, off);
9004 return off;
9005}
9006
9007
9008
9009/*********************************************************************************************************************************
9010* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
9011*********************************************************************************************************************************/
9012/** Number of hidden arguments for CIMPL calls.
9013 * @note We're sufferning from the usual VBOXSTRICTRC fun on Windows. */
9014#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
9015# define IEM_CIMPL_HIDDEN_ARGS 3
9016#else
9017# define IEM_CIMPL_HIDDEN_ARGS 2
9018#endif
9019
9020#define IEM_MC_NOREF(a_Name) \
9021 RT_NOREF_PV(a_Name)
9022
9023#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
9024 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
9025
9026#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
9027 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
9028
9029#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
9030 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
9031
9032#define IEM_MC_LOCAL(a_Type, a_Name) \
9033 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
9034
9035#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
9036 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
9037
9038
9039/**
9040 * Gets the number of hidden arguments for an expected IEM_MC_CALL statement.
9041 */
9042DECLINLINE(uint8_t) iemNativeArgGetHiddenArgCount(PIEMRECOMPILERSTATE pReNative)
9043{
9044 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_CIMPL)
9045 return IEM_CIMPL_HIDDEN_ARGS;
9046 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE)
9047 return 1;
9048 return 0;
9049}
9050
9051
9052/**
9053 * Internal work that allocates a variable with kind set to
9054 * kIemNativeVarKind_Invalid and no current stack allocation.
9055 *
9056 * The kind will either be set by the caller or later when the variable is first
9057 * assigned a value.
9058 *
9059 * @returns Unpacked index.
9060 * @internal
9061 */
9062static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
9063{
9064 Assert(cbType > 0 && cbType <= 64);
9065 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
9066 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
9067 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
9068 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
9069 pReNative->Core.aVars[idxVar].cbVar = cbType;
9070 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
9071 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
9072 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
9073 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
9074 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
9075 pReNative->Core.aVars[idxVar].fRegAcquired = false;
9076 pReNative->Core.aVars[idxVar].u.uValue = 0;
9077 return idxVar;
9078}
9079
9080
9081/**
9082 * Internal work that allocates an argument variable w/o setting enmKind.
9083 *
9084 * @returns Unpacked index.
9085 * @internal
9086 */
9087static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
9088{
9089 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
9090 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
9091 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
9092
9093 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
9094 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
9095 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
9096 return idxVar;
9097}
9098
9099
9100/**
9101 * Gets the stack slot for a stack variable, allocating one if necessary.
9102 *
9103 * Calling this function implies that the stack slot will contain a valid
9104 * variable value. The caller deals with any register currently assigned to the
9105 * variable, typically by spilling it into the stack slot.
9106 *
9107 * @returns The stack slot number.
9108 * @param pReNative The recompiler state.
9109 * @param idxVar The variable.
9110 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
9111 */
9112DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
9113{
9114 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9115 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9116 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
9117
9118 /* Already got a slot? */
9119 uint8_t const idxStackSlot = pVar->idxStackSlot;
9120 if (idxStackSlot != UINT8_MAX)
9121 {
9122 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
9123 return idxStackSlot;
9124 }
9125
9126 /*
9127 * A single slot is easy to allocate.
9128 * Allocate them from the top end, closest to BP, to reduce the displacement.
9129 */
9130 if (pVar->cbVar <= sizeof(uint64_t))
9131 {
9132 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
9133 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
9134 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
9135 pVar->idxStackSlot = (uint8_t)iSlot;
9136 Log11(("iemNativeVarSetKindToStack: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
9137 return (uint8_t)iSlot;
9138 }
9139
9140 /*
9141 * We need more than one stack slot.
9142 *
9143 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
9144 */
9145 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
9146 Assert(pVar->cbVar <= 64);
9147 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
9148 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
9149 uint32_t bmStack = ~pReNative->Core.bmStack;
9150 while (bmStack != UINT32_MAX)
9151 {
9152/** @todo allocate from the top to reduce BP displacement. */
9153 unsigned const iSlot = ASMBitFirstSetU32(bmStack) - 1;
9154 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
9155 if (!(iSlot & fBitAlignMask))
9156 {
9157 if ((bmStack & (fBitAllocMask << iSlot)) == (fBitAllocMask << iSlot))
9158 {
9159 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
9160 pVar->idxStackSlot = (uint8_t)iSlot;
9161 Log11(("iemNativeVarSetKindToStack: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
9162 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
9163 return (uint8_t)iSlot;
9164 }
9165 }
9166 bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);
9167 }
9168 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
9169}
9170
9171
9172/**
9173 * Changes the variable to a stack variable.
9174 *
9175 * Currently this is s only possible to do the first time the variable is used,
9176 * switching later is can be implemented but not done.
9177 *
9178 * @param pReNative The recompiler state.
9179 * @param idxVar The variable.
9180 * @throws VERR_IEM_VAR_IPE_2
9181 */
9182static void iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
9183{
9184 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9185 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9186 if (pVar->enmKind != kIemNativeVarKind_Stack)
9187 {
9188 /* We could in theory transition from immediate to stack as well, but it
9189 would involve the caller doing work storing the value on the stack. So,
9190 till that's required we only allow transition from invalid. */
9191 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9192 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9193 pVar->enmKind = kIemNativeVarKind_Stack;
9194
9195 /* Note! We don't allocate a stack slot here, that's only done when a
9196 slot is actually needed to hold a variable value. */
9197 }
9198}
9199
9200
9201/**
9202 * Sets it to a variable with a constant value.
9203 *
9204 * This does not require stack storage as we know the value and can always
9205 * reload it, unless of course it's referenced.
9206 *
9207 * @param pReNative The recompiler state.
9208 * @param idxVar The variable.
9209 * @param uValue The immediate value.
9210 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
9211 */
9212static void iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
9213{
9214 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9215 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9216 if (pVar->enmKind != kIemNativeVarKind_Immediate)
9217 {
9218 /* Only simple transitions for now. */
9219 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9220 pVar->enmKind = kIemNativeVarKind_Immediate;
9221 }
9222 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9223
9224 pVar->u.uValue = uValue;
9225 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
9226 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
9227 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
9228}
9229
9230
9231/**
9232 * Sets the variable to a reference (pointer) to @a idxOtherVar.
9233 *
9234 * This does not require stack storage as we know the value and can always
9235 * reload it. Loading is postponed till needed.
9236 *
9237 * @param pReNative The recompiler state.
9238 * @param idxVar The variable. Unpacked.
9239 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
9240 *
9241 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
9242 * @internal
9243 */
9244static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
9245{
9246 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
9247 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
9248
9249 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
9250 {
9251 /* Only simple transitions for now. */
9252 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
9253 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9254 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
9255 }
9256 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9257
9258 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
9259
9260 /* Update the other variable, ensure it's a stack variable. */
9261 /** @todo handle variables with const values... that'll go boom now. */
9262 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
9263 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
9264}
9265
9266
9267/**
9268 * Sets the variable to a reference (pointer) to a guest register reference.
9269 *
9270 * This does not require stack storage as we know the value and can always
9271 * reload it. Loading is postponed till needed.
9272 *
9273 * @param pReNative The recompiler state.
9274 * @param idxVar The variable.
9275 * @param enmRegClass The class guest registers to reference.
9276 * @param idxReg The register within @a enmRegClass to reference.
9277 *
9278 * @throws VERR_IEM_VAR_IPE_2
9279 */
9280static void iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
9281 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
9282{
9283 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9284 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9285
9286 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
9287 {
9288 /* Only simple transitions for now. */
9289 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9290 pVar->enmKind = kIemNativeVarKind_GstRegRef;
9291 }
9292 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9293
9294 pVar->u.GstRegRef.enmClass = enmRegClass;
9295 pVar->u.GstRegRef.idx = idxReg;
9296}
9297
9298
9299DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
9300{
9301 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
9302}
9303
9304
9305DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
9306{
9307 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
9308
9309 /* Since we're using a generic uint64_t value type, we must truncate it if
9310 the variable is smaller otherwise we may end up with too large value when
9311 scaling up a imm8 w/ sign-extension.
9312
9313 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
9314 in the bios, bx=1) when running on arm, because clang expect 16-bit
9315 register parameters to have bits 16 and up set to zero. Instead of
9316 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
9317 CF value in the result. */
9318 switch (cbType)
9319 {
9320 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
9321 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
9322 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
9323 }
9324 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
9325 return idxVar;
9326}
9327
9328
9329DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
9330{
9331 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
9332 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
9333 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
9334 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
9335 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
9336 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
9337
9338 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
9339 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
9340 return idxArgVar;
9341}
9342
9343
9344DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
9345{
9346 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
9347 /* Don't set to stack now, leave that to the first use as for instance
9348 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
9349 return idxVar;
9350}
9351
9352
9353DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
9354{
9355 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
9356
9357 /* Since we're using a generic uint64_t value type, we must truncate it if
9358 the variable is smaller otherwise we may end up with too large value when
9359 scaling up a imm8 w/ sign-extension. */
9360 switch (cbType)
9361 {
9362 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
9363 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
9364 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
9365 }
9366 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
9367 return idxVar;
9368}
9369
9370
9371/**
9372 * Makes sure variable @a idxVar has a register assigned to it and that it stays
9373 * fixed till we call iemNativeVarRegisterRelease.
9374 *
9375 * @returns The host register number.
9376 * @param pReNative The recompiler state.
9377 * @param idxVar The variable.
9378 * @param poff Pointer to the instruction buffer offset.
9379 * In case a register needs to be freed up or the value
9380 * loaded off the stack.
9381 * @param fInitialized Set if the variable must already have been initialized.
9382 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
9383 * the case.
9384 * @param idxRegPref Preferred register number or UINT8_MAX.
9385 */
9386DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
9387 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
9388{
9389 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9390 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9391 Assert(pVar->cbVar <= 8);
9392 Assert(!pVar->fRegAcquired);
9393
9394 uint8_t idxReg = pVar->idxReg;
9395 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9396 {
9397 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
9398 && pVar->enmKind < kIemNativeVarKind_End);
9399 pVar->fRegAcquired = true;
9400 return idxReg;
9401 }
9402
9403 /*
9404 * If the kind of variable has not yet been set, default to 'stack'.
9405 */
9406 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
9407 && pVar->enmKind < kIemNativeVarKind_End);
9408 if (pVar->enmKind == kIemNativeVarKind_Invalid)
9409 iemNativeVarSetKindToStack(pReNative, idxVar);
9410
9411 /*
9412 * We have to allocate a register for the variable, even if its a stack one
9413 * as we don't know if there are modification being made to it before its
9414 * finalized (todo: analyze and insert hints about that?).
9415 *
9416 * If we can, we try get the correct register for argument variables. This
9417 * is assuming that most argument variables are fetched as close as possible
9418 * to the actual call, so that there aren't any interfering hidden calls
9419 * (memory accesses, etc) inbetween.
9420 *
9421 * If we cannot or it's a variable, we make sure no argument registers
9422 * that will be used by this MC block will be allocated here, and we always
9423 * prefer non-volatile registers to avoid needing to spill stuff for internal
9424 * call.
9425 */
9426 /** @todo Detect too early argument value fetches and warn about hidden
9427 * calls causing less optimal code to be generated in the python script. */
9428
9429 uint8_t const uArgNo = pVar->uArgNo;
9430 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
9431 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
9432 {
9433 idxReg = g_aidxIemNativeCallRegs[uArgNo];
9434 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
9435 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
9436 }
9437 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
9438 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
9439 {
9440 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
9441 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
9442 & ~pReNative->Core.bmHstRegsWithGstShadow
9443 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
9444 & fNotArgsMask;
9445 if (fRegs)
9446 {
9447 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
9448 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
9449 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
9450 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
9451 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
9452 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
9453 }
9454 else
9455 {
9456 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
9457 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
9458 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
9459 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
9460 }
9461 }
9462 else
9463 {
9464 idxReg = idxRegPref;
9465 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
9466 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
9467 }
9468 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
9469 pVar->idxReg = idxReg;
9470
9471 /*
9472 * Load it off the stack if we've got a stack slot.
9473 */
9474 uint8_t const idxStackSlot = pVar->idxStackSlot;
9475 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
9476 {
9477 Assert(fInitialized);
9478 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
9479 switch (pVar->cbVar)
9480 {
9481 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
9482 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
9483 case 3: AssertFailed(); RT_FALL_THRU();
9484 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
9485 default: AssertFailed(); RT_FALL_THRU();
9486 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
9487 }
9488 }
9489 else
9490 {
9491 Assert(idxStackSlot == UINT8_MAX);
9492 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
9493 }
9494 pVar->fRegAcquired = true;
9495 return idxReg;
9496}
9497
9498
9499/**
9500 * The value of variable @a idxVar will be written in full to the @a enmGstReg
9501 * guest register.
9502 *
9503 * This function makes sure there is a register for it and sets it to be the
9504 * current shadow copy of @a enmGstReg.
9505 *
9506 * @returns The host register number.
9507 * @param pReNative The recompiler state.
9508 * @param idxVar The variable.
9509 * @param enmGstReg The guest register this variable will be written to
9510 * after this call.
9511 * @param poff Pointer to the instruction buffer offset.
9512 * In case a register needs to be freed up or if the
9513 * variable content needs to be loaded off the stack.
9514 *
9515 * @note We DO NOT expect @a idxVar to be an argument variable,
9516 * because we can only in the commit stage of an instruction when this
9517 * function is used.
9518 */
9519DECL_HIDDEN_THROW(uint8_t)
9520iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
9521{
9522 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9523 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9524 Assert(!pVar->fRegAcquired);
9525 AssertMsgStmt( pVar->cbVar <= 8
9526 && ( pVar->enmKind == kIemNativeVarKind_Immediate
9527 || pVar->enmKind == kIemNativeVarKind_Stack),
9528 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
9529 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
9530 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
9531
9532 /*
9533 * This shouldn't ever be used for arguments, unless it's in a weird else
9534 * branch that doesn't do any calling and even then it's questionable.
9535 *
9536 * However, in case someone writes crazy wrong MC code and does register
9537 * updates before making calls, just use the regular register allocator to
9538 * ensure we get a register suitable for the intended argument number.
9539 */
9540 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
9541
9542 /*
9543 * If there is already a register for the variable, we transfer/set the
9544 * guest shadow copy assignment to it.
9545 */
9546 uint8_t idxReg = pVar->idxReg;
9547 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9548 {
9549 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
9550 {
9551 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
9552 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
9553 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
9554 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
9555 }
9556 else
9557 {
9558 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
9559 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
9560 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
9561 }
9562 /** @todo figure this one out. We need some way of making sure the register isn't
9563 * modified after this point, just in case we start writing crappy MC code. */
9564 pVar->enmGstReg = enmGstReg;
9565 pVar->fRegAcquired = true;
9566 return idxReg;
9567 }
9568 Assert(pVar->uArgNo == UINT8_MAX);
9569
9570 /*
9571 * Because this is supposed to be the commit stage, we're just tag along with the
9572 * temporary register allocator and upgrade it to a variable register.
9573 */
9574 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
9575 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
9576 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
9577 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
9578 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
9579 pVar->idxReg = idxReg;
9580
9581 /*
9582 * Now we need to load the register value.
9583 */
9584 if (pVar->enmKind == kIemNativeVarKind_Immediate)
9585 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
9586 else
9587 {
9588 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
9589 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
9590 switch (pVar->cbVar)
9591 {
9592 case sizeof(uint64_t):
9593 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
9594 break;
9595 case sizeof(uint32_t):
9596 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
9597 break;
9598 case sizeof(uint16_t):
9599 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
9600 break;
9601 case sizeof(uint8_t):
9602 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
9603 break;
9604 default:
9605 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
9606 }
9607 }
9608
9609 pVar->fRegAcquired = true;
9610 return idxReg;
9611}
9612
9613
9614/**
9615 * Sets the host register for @a idxVarRc to @a idxReg.
9616 *
9617 * The register must not be allocated. Any guest register shadowing will be
9618 * implictly dropped by this call.
9619 *
9620 * The variable must not have any register associated with it (causes
9621 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
9622 * implied.
9623 *
9624 * @returns idxReg
9625 * @param pReNative The recompiler state.
9626 * @param idxVar The variable.
9627 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
9628 * @param off For recording in debug info.
9629 *
9630 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
9631 */
9632DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
9633{
9634 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9635 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9636 Assert(!pVar->fRegAcquired);
9637 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
9638 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
9639 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
9640
9641 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
9642 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
9643
9644 iemNativeVarSetKindToStack(pReNative, idxVar);
9645 pVar->idxReg = idxReg;
9646
9647 return idxReg;
9648}
9649
9650
9651/**
9652 * A convenient helper function.
9653 */
9654DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
9655 uint8_t idxReg, uint32_t *poff)
9656{
9657 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
9658 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
9659 return idxReg;
9660}
9661
9662
9663/**
9664 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
9665 *
9666 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
9667 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
9668 * requirement of flushing anything in volatile host registers when making a
9669 * call.
9670 *
9671 * @returns New @a off value.
9672 * @param pReNative The recompiler state.
9673 * @param off The code buffer position.
9674 * @param fHstRegsNotToSave Set of registers not to save & restore.
9675 */
9676DECL_HIDDEN_THROW(uint32_t)
9677iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
9678{
9679 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
9680 if (fHstRegs)
9681 {
9682 do
9683 {
9684 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
9685 fHstRegs &= ~RT_BIT_32(idxHstReg);
9686
9687 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
9688 {
9689 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
9690 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9691 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
9692 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
9693 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
9694 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
9695 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
9696 {
9697 case kIemNativeVarKind_Stack:
9698 {
9699 /* Temporarily spill the variable register. */
9700 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
9701 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
9702 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
9703 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
9704 continue;
9705 }
9706
9707 case kIemNativeVarKind_Immediate:
9708 case kIemNativeVarKind_VarRef:
9709 case kIemNativeVarKind_GstRegRef:
9710 /* It is weird to have any of these loaded at this point. */
9711 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
9712 continue;
9713
9714 case kIemNativeVarKind_End:
9715 case kIemNativeVarKind_Invalid:
9716 break;
9717 }
9718 AssertFailed();
9719 }
9720 else
9721 {
9722 /*
9723 * Allocate a temporary stack slot and spill the register to it.
9724 */
9725 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
9726 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
9727 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
9728 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
9729 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
9730 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
9731 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
9732 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
9733 }
9734 } while (fHstRegs);
9735 }
9736 return off;
9737}
9738
9739
9740/**
9741 * Emit code to restore volatile registers after to a call to a helper.
9742 *
9743 * @returns New @a off value.
9744 * @param pReNative The recompiler state.
9745 * @param off The code buffer position.
9746 * @param fHstRegsNotToSave Set of registers not to save & restore.
9747 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
9748 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
9749 */
9750DECL_HIDDEN_THROW(uint32_t)
9751iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
9752{
9753 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
9754 if (fHstRegs)
9755 {
9756 do
9757 {
9758 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
9759 fHstRegs &= ~RT_BIT_32(idxHstReg);
9760
9761 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
9762 {
9763 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
9764 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9765 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
9766 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
9767 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
9768 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
9769 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
9770 {
9771 case kIemNativeVarKind_Stack:
9772 {
9773 /* Unspill the variable register. */
9774 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
9775 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
9776 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
9777 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
9778 continue;
9779 }
9780
9781 case kIemNativeVarKind_Immediate:
9782 case kIemNativeVarKind_VarRef:
9783 case kIemNativeVarKind_GstRegRef:
9784 /* It is weird to have any of these loaded at this point. */
9785 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
9786 continue;
9787
9788 case kIemNativeVarKind_End:
9789 case kIemNativeVarKind_Invalid:
9790 break;
9791 }
9792 AssertFailed();
9793 }
9794 else
9795 {
9796 /*
9797 * Restore from temporary stack slot.
9798 */
9799 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
9800 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
9801 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
9802 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
9803
9804 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
9805 }
9806 } while (fHstRegs);
9807 }
9808 return off;
9809}
9810
9811
9812/**
9813 * Worker that frees the stack slots for variable @a idxVar if any allocated.
9814 *
9815 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
9816 *
9817 * ASSUMES that @a idxVar is valid and unpacked.
9818 */
9819DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
9820{
9821 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
9822 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
9823 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
9824 {
9825 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
9826 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
9827 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
9828 Assert(cSlots > 0);
9829 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
9830 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
9831 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
9832 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
9833 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
9834 }
9835 else
9836 Assert(idxStackSlot == UINT8_MAX);
9837}
9838
9839
9840/**
9841 * Worker that frees a single variable.
9842 *
9843 * ASSUMES that @a idxVar is valid and unpacked.
9844 */
9845DECLINLINE(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
9846{
9847 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
9848 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
9849 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
9850
9851 /* Free the host register first if any assigned. */
9852 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
9853 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9854 {
9855 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
9856 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
9857 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
9858 }
9859
9860 /* Free argument mapping. */
9861 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
9862 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
9863 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
9864
9865 /* Free the stack slots. */
9866 iemNativeVarFreeStackSlots(pReNative, idxVar);
9867
9868 /* Free the actual variable. */
9869 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
9870 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
9871}
9872
9873
9874/**
9875 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
9876 */
9877DECLINLINE(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
9878{
9879 while (bmVars != 0)
9880 {
9881 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
9882 bmVars &= ~RT_BIT_32(idxVar);
9883
9884#if 1 /** @todo optimize by simplifying this later... */
9885 iemNativeVarFreeOneWorker(pReNative, idxVar);
9886#else
9887 /* Only need to free the host register, the rest is done as bulk updates below. */
9888 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
9889 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9890 {
9891 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
9892 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
9893 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
9894 }
9895#endif
9896 }
9897#if 0 /** @todo optimize by simplifying this later... */
9898 pReNative->Core.bmVars = 0;
9899 pReNative->Core.bmStack = 0;
9900 pReNative->Core.u64ArgVars = UINT64_MAX;
9901#endif
9902}
9903
9904
9905/**
9906 * This is called by IEM_MC_END() to clean up all variables.
9907 */
9908DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
9909{
9910 uint32_t const bmVars = pReNative->Core.bmVars;
9911 if (bmVars != 0)
9912 iemNativeVarFreeAllSlow(pReNative, bmVars);
9913 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
9914 Assert(pReNative->Core.bmStack == 0);
9915}
9916
9917
9918#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
9919
9920/**
9921 * This is called by IEM_MC_FREE_LOCAL.
9922 */
9923DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
9924{
9925 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9926 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
9927 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
9928}
9929
9930
9931#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
9932
9933/**
9934 * This is called by IEM_MC_FREE_ARG.
9935 */
9936DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
9937{
9938 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9939 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
9940 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
9941}
9942
9943
9944#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
9945
9946/**
9947 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
9948 */
9949DECL_INLINE_THROW(uint32_t)
9950iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
9951{
9952 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
9953 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
9954 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9955 Assert( pVarDst->cbVar == sizeof(uint16_t)
9956 || pVarDst->cbVar == sizeof(uint32_t));
9957
9958 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
9959 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
9960 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
9961 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
9962 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9963
9964 Assert(pVarDst->cbVar < pVarSrc->cbVar);
9965
9966 /*
9967 * Special case for immediates.
9968 */
9969 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
9970 {
9971 switch (pVarDst->cbVar)
9972 {
9973 case sizeof(uint16_t):
9974 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
9975 break;
9976 case sizeof(uint32_t):
9977 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
9978 break;
9979 default: AssertFailed(); break;
9980 }
9981 }
9982 else
9983 {
9984 /*
9985 * The generic solution for now.
9986 */
9987 /** @todo optimize this by having the python script make sure the source
9988 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
9989 * statement. Then we could just transfer the register assignments. */
9990 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
9991 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
9992 switch (pVarDst->cbVar)
9993 {
9994 case sizeof(uint16_t):
9995 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
9996 break;
9997 case sizeof(uint32_t):
9998 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
9999 break;
10000 default: AssertFailed(); break;
10001 }
10002 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
10003 iemNativeVarRegisterRelease(pReNative, idxVarDst);
10004 }
10005 return off;
10006}
10007
10008
10009
10010/*********************************************************************************************************************************
10011* Emitters for IEM_MC_CALL_CIMPL_XXX *
10012*********************************************************************************************************************************/
10013
10014/**
10015 * Emits code to load a reference to the given guest register into @a idxGprDst.
10016 */
10017DECL_INLINE_THROW(uint32_t)
10018iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
10019 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
10020{
10021#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
10022 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
10023#endif
10024
10025 /*
10026 * Get the offset relative to the CPUMCTX structure.
10027 */
10028 uint32_t offCpumCtx;
10029 switch (enmClass)
10030 {
10031 case kIemNativeGstRegRef_Gpr:
10032 Assert(idxRegInClass < 16);
10033 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
10034 break;
10035
10036 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
10037 Assert(idxRegInClass < 4);
10038 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
10039 break;
10040
10041 case kIemNativeGstRegRef_EFlags:
10042 Assert(idxRegInClass == 0);
10043 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
10044 break;
10045
10046 case kIemNativeGstRegRef_MxCsr:
10047 Assert(idxRegInClass == 0);
10048 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
10049 break;
10050
10051 case kIemNativeGstRegRef_FpuReg:
10052 Assert(idxRegInClass < 8);
10053 AssertFailed(); /** @todo what kind of indexing? */
10054 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
10055 break;
10056
10057 case kIemNativeGstRegRef_MReg:
10058 Assert(idxRegInClass < 8);
10059 AssertFailed(); /** @todo what kind of indexing? */
10060 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
10061 break;
10062
10063 case kIemNativeGstRegRef_XReg:
10064 Assert(idxRegInClass < 16);
10065 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
10066 break;
10067
10068 default:
10069 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
10070 }
10071
10072 /*
10073 * Load the value into the destination register.
10074 */
10075#ifdef RT_ARCH_AMD64
10076 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
10077
10078#elif defined(RT_ARCH_ARM64)
10079 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
10080 Assert(offCpumCtx < 4096);
10081 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
10082
10083#else
10084# error "Port me!"
10085#endif
10086
10087 return off;
10088}
10089
10090
10091/**
10092 * Common code for CIMPL and AIMPL calls.
10093 *
10094 * These are calls that uses argument variables and such. They should not be
10095 * confused with internal calls required to implement an MC operation,
10096 * like a TLB load and similar.
10097 *
10098 * Upon return all that is left to do is to load any hidden arguments and
10099 * perform the call. All argument variables are freed.
10100 *
10101 * @returns New code buffer offset; throws VBox status code on error.
10102 * @param pReNative The native recompile state.
10103 * @param off The code buffer offset.
10104 * @param cArgs The total nubmer of arguments (includes hidden
10105 * count).
10106 * @param cHiddenArgs The number of hidden arguments. The hidden
10107 * arguments must not have any variable declared for
10108 * them, whereas all the regular arguments must
10109 * (tstIEMCheckMc ensures this).
10110 */
10111DECL_HIDDEN_THROW(uint32_t)
10112iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
10113{
10114#ifdef VBOX_STRICT
10115 /*
10116 * Assert sanity.
10117 */
10118 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
10119 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
10120 for (unsigned i = 0; i < cHiddenArgs; i++)
10121 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
10122 for (unsigned i = cHiddenArgs; i < cArgs; i++)
10123 {
10124 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
10125 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
10126 }
10127 iemNativeRegAssertSanity(pReNative);
10128#endif
10129
10130 /* We don't know what the called function makes use of, so flush any pending register writes. */
10131 off = iemNativeRegFlushPendingWrites(pReNative, off);
10132
10133 /*
10134 * Before we do anything else, go over variables that are referenced and
10135 * make sure they are not in a register.
10136 */
10137 uint32_t bmVars = pReNative->Core.bmVars;
10138 if (bmVars)
10139 {
10140 do
10141 {
10142 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
10143 bmVars &= ~RT_BIT_32(idxVar);
10144
10145 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
10146 {
10147 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
10148 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
10149 {
10150 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
10151 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
10152 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
10153 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
10154 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
10155
10156 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
10157 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
10158 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
10159 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
10160 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
10161 }
10162 }
10163 } while (bmVars != 0);
10164#if 0 //def VBOX_STRICT
10165 iemNativeRegAssertSanity(pReNative);
10166#endif
10167 }
10168
10169 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
10170
10171 /*
10172 * First, go over the host registers that will be used for arguments and make
10173 * sure they either hold the desired argument or are free.
10174 */
10175 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
10176 {
10177 for (uint32_t i = 0; i < cRegArgs; i++)
10178 {
10179 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
10180 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
10181 {
10182 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
10183 {
10184 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
10185 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
10186 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
10187 Assert(pVar->idxReg == idxArgReg);
10188 uint8_t const uArgNo = pVar->uArgNo;
10189 if (uArgNo == i)
10190 { /* prefect */ }
10191 /* The variable allocator logic should make sure this is impossible,
10192 except for when the return register is used as a parameter (ARM,
10193 but not x86). */
10194#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
10195 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
10196 {
10197# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
10198# error "Implement this"
10199# endif
10200 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
10201 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
10202 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
10203 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
10204 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
10205 }
10206#endif
10207 else
10208 {
10209 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
10210
10211 if (pVar->enmKind == kIemNativeVarKind_Stack)
10212 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
10213 else
10214 {
10215 /* just free it, can be reloaded if used again */
10216 pVar->idxReg = UINT8_MAX;
10217 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
10218 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
10219 }
10220 }
10221 }
10222 else
10223 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
10224 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
10225 }
10226 }
10227#if 0 //def VBOX_STRICT
10228 iemNativeRegAssertSanity(pReNative);
10229#endif
10230 }
10231
10232 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
10233
10234#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
10235 /*
10236 * If there are any stack arguments, make sure they are in their place as well.
10237 *
10238 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
10239 * the caller) be loading it later and it must be free (see first loop).
10240 */
10241 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
10242 {
10243 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
10244 {
10245 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
10246 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
10247 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
10248 {
10249 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
10250 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
10251 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
10252 pVar->idxReg = UINT8_MAX;
10253 }
10254 else
10255 {
10256 /* Use ARG0 as temp for stuff we need registers for. */
10257 switch (pVar->enmKind)
10258 {
10259 case kIemNativeVarKind_Stack:
10260 {
10261 uint8_t const idxStackSlot = pVar->idxStackSlot;
10262 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
10263 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
10264 iemNativeStackCalcBpDisp(idxStackSlot));
10265 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
10266 continue;
10267 }
10268
10269 case kIemNativeVarKind_Immediate:
10270 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
10271 continue;
10272
10273 case kIemNativeVarKind_VarRef:
10274 {
10275 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
10276 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
10277 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
10278 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
10279 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
10280 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
10281 {
10282 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
10283 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
10284 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
10285 }
10286 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
10287 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
10288 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
10289 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
10290 continue;
10291 }
10292
10293 case kIemNativeVarKind_GstRegRef:
10294 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
10295 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
10296 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
10297 continue;
10298
10299 case kIemNativeVarKind_Invalid:
10300 case kIemNativeVarKind_End:
10301 break;
10302 }
10303 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
10304 }
10305 }
10306# if 0 //def VBOX_STRICT
10307 iemNativeRegAssertSanity(pReNative);
10308# endif
10309 }
10310#else
10311 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
10312#endif
10313
10314 /*
10315 * Make sure the argument variables are loaded into their respective registers.
10316 *
10317 * We can optimize this by ASSUMING that any register allocations are for
10318 * registeres that have already been loaded and are ready. The previous step
10319 * saw to that.
10320 */
10321 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
10322 {
10323 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
10324 {
10325 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
10326 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
10327 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
10328 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
10329 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
10330 else
10331 {
10332 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
10333 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
10334 {
10335 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
10336 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
10337 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
10338 | RT_BIT_32(idxArgReg);
10339 pVar->idxReg = idxArgReg;
10340 }
10341 else
10342 {
10343 /* Use ARG0 as temp for stuff we need registers for. */
10344 switch (pVar->enmKind)
10345 {
10346 case kIemNativeVarKind_Stack:
10347 {
10348 uint8_t const idxStackSlot = pVar->idxStackSlot;
10349 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
10350 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
10351 continue;
10352 }
10353
10354 case kIemNativeVarKind_Immediate:
10355 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
10356 continue;
10357
10358 case kIemNativeVarKind_VarRef:
10359 {
10360 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
10361 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
10362 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
10363 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
10364 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
10365 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
10366 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
10367 {
10368 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
10369 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
10370 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
10371 }
10372 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
10373 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
10374 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
10375 continue;
10376 }
10377
10378 case kIemNativeVarKind_GstRegRef:
10379 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
10380 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
10381 continue;
10382
10383 case kIemNativeVarKind_Invalid:
10384 case kIemNativeVarKind_End:
10385 break;
10386 }
10387 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
10388 }
10389 }
10390 }
10391#if 0 //def VBOX_STRICT
10392 iemNativeRegAssertSanity(pReNative);
10393#endif
10394 }
10395#ifdef VBOX_STRICT
10396 else
10397 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
10398 {
10399 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
10400 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
10401 }
10402#endif
10403
10404 /*
10405 * Free all argument variables (simplified).
10406 * Their lifetime always expires with the call they are for.
10407 */
10408 /** @todo Make the python script check that arguments aren't used after
10409 * IEM_MC_CALL_XXXX. */
10410 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
10411 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
10412 * an argument value. There is also some FPU stuff. */
10413 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
10414 {
10415 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
10416 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
10417
10418 /* no need to free registers: */
10419 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
10420 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
10421 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
10422 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
10423 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
10424 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
10425
10426 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
10427 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
10428 iemNativeVarFreeStackSlots(pReNative, idxVar);
10429 }
10430 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
10431
10432 /*
10433 * Flush volatile registers as we make the call.
10434 */
10435 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
10436
10437 return off;
10438}
10439
10440
10441/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
10442DECL_HIDDEN_THROW(uint32_t)
10443iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
10444 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
10445
10446{
10447 /*
10448 * Do all the call setup and cleanup.
10449 */
10450 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
10451
10452 /*
10453 * Load the two or three hidden arguments.
10454 */
10455#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
10456 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
10457 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
10458 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
10459#else
10460 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
10461 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
10462#endif
10463
10464 /*
10465 * Make the call and check the return code.
10466 *
10467 * Shadow PC copies are always flushed here, other stuff depends on flags.
10468 * Segment and general purpose registers are explictily flushed via the
10469 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
10470 * macros.
10471 */
10472 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
10473#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
10474 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
10475#endif
10476 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
10477 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
10478 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
10479 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
10480
10481 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
10482}
10483
10484
10485#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
10486 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
10487
10488/** Emits code for IEM_MC_CALL_CIMPL_1. */
10489DECL_INLINE_THROW(uint32_t)
10490iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
10491 uintptr_t pfnCImpl, uint8_t idxArg0)
10492{
10493 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
10494 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
10495}
10496
10497
10498#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
10499 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
10500
10501/** Emits code for IEM_MC_CALL_CIMPL_2. */
10502DECL_INLINE_THROW(uint32_t)
10503iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
10504 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
10505{
10506 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
10507 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
10508 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
10509}
10510
10511
10512#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
10513 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
10514 (uintptr_t)a_pfnCImpl, a0, a1, a2)
10515
10516/** Emits code for IEM_MC_CALL_CIMPL_3. */
10517DECL_INLINE_THROW(uint32_t)
10518iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
10519 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
10520{
10521 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
10522 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
10523 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
10524 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
10525}
10526
10527
10528#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
10529 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
10530 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
10531
10532/** Emits code for IEM_MC_CALL_CIMPL_4. */
10533DECL_INLINE_THROW(uint32_t)
10534iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
10535 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
10536{
10537 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
10538 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
10539 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
10540 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
10541 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
10542}
10543
10544
10545#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
10546 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
10547 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
10548
10549/** Emits code for IEM_MC_CALL_CIMPL_4. */
10550DECL_INLINE_THROW(uint32_t)
10551iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
10552 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
10553{
10554 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
10555 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
10556 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
10557 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
10558 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
10559 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
10560}
10561
10562
10563/** Recompiler debugging: Flush guest register shadow copies. */
10564#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
10565
10566
10567
10568/*********************************************************************************************************************************
10569* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
10570*********************************************************************************************************************************/
10571
10572/**
10573 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
10574 */
10575DECL_INLINE_THROW(uint32_t)
10576iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
10577 uintptr_t pfnAImpl, uint8_t cArgs)
10578{
10579 if (idxVarRc != UINT8_MAX)
10580 {
10581 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
10582 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
10583 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
10584 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
10585 }
10586
10587 /*
10588 * Do all the call setup and cleanup.
10589 */
10590 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/);
10591
10592 /*
10593 * Make the call and update the return code variable if we've got one.
10594 */
10595 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
10596 if (idxVarRc != UINT8_MAX)
10597 {
10598off = iemNativeEmitBrk(pReNative, off, 0x4222); /** @todo test IEM_MC_CALL_AIMPL_3 and IEM_MC_CALL_AIMPL_4 return codes. */
10599 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
10600 }
10601
10602 return off;
10603}
10604
10605
10606
10607#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
10608 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
10609
10610#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
10611 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
10612
10613/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
10614DECL_INLINE_THROW(uint32_t)
10615iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
10616{
10617 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
10618}
10619
10620
10621#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
10622 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
10623
10624#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
10625 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
10626
10627/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
10628DECL_INLINE_THROW(uint32_t)
10629iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
10630{
10631 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
10632 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
10633}
10634
10635
10636#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
10637 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
10638
10639#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
10640 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
10641
10642/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
10643DECL_INLINE_THROW(uint32_t)
10644iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
10645 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
10646{
10647 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
10648 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
10649 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
10650}
10651
10652
10653#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
10654 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
10655
10656#define IEM_MC_CALL_AIMPL_3(a_rc, a_pfn, a0, a1, a2) \
10657 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
10658
10659/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
10660DECL_INLINE_THROW(uint32_t)
10661iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
10662 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
10663{
10664 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
10665 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
10666 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
10667 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
10668}
10669
10670
10671#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
10672 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
10673
10674#define IEM_MC_CALL_AIMPL_4(a_rc, a_pfn, a0, a1, a2, a3) \
10675 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
10676
10677/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
10678DECL_INLINE_THROW(uint32_t)
10679iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
10680 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
10681{
10682 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
10683 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
10684 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
10685 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
10686 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
10687}
10688
10689
10690
10691/*********************************************************************************************************************************
10692* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
10693*********************************************************************************************************************************/
10694
10695#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
10696 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
10697
10698#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
10699 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
10700
10701#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
10702 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
10703
10704#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
10705 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
10706
10707
10708/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
10709 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
10710DECL_INLINE_THROW(uint32_t)
10711iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
10712{
10713 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10714 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
10715 Assert(iGRegEx < 20);
10716
10717 /* Same discussion as in iemNativeEmitFetchGregU16 */
10718 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
10719 kIemNativeGstRegUse_ReadOnly);
10720
10721 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10722 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10723
10724 /* The value is zero-extended to the full 64-bit host register width. */
10725 if (iGRegEx < 16)
10726 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
10727 else
10728 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
10729
10730 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10731 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
10732 return off;
10733}
10734
10735
10736#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
10737 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
10738
10739#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
10740 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
10741
10742#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
10743 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
10744
10745/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
10746DECL_INLINE_THROW(uint32_t)
10747iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
10748{
10749 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10750 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
10751 Assert(iGRegEx < 20);
10752
10753 /* Same discussion as in iemNativeEmitFetchGregU16 */
10754 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
10755 kIemNativeGstRegUse_ReadOnly);
10756
10757 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10758 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10759
10760 if (iGRegEx < 16)
10761 {
10762 switch (cbSignExtended)
10763 {
10764 case sizeof(uint16_t):
10765 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
10766 break;
10767 case sizeof(uint32_t):
10768 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
10769 break;
10770 case sizeof(uint64_t):
10771 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
10772 break;
10773 default: AssertFailed(); break;
10774 }
10775 }
10776 else
10777 {
10778 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
10779 switch (cbSignExtended)
10780 {
10781 case sizeof(uint16_t):
10782 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
10783 break;
10784 case sizeof(uint32_t):
10785 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
10786 break;
10787 case sizeof(uint64_t):
10788 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
10789 break;
10790 default: AssertFailed(); break;
10791 }
10792 }
10793
10794 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10795 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
10796 return off;
10797}
10798
10799
10800
10801#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
10802 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
10803
10804#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
10805 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
10806
10807#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
10808 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
10809
10810/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
10811DECL_INLINE_THROW(uint32_t)
10812iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
10813{
10814 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10815 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
10816 Assert(iGReg < 16);
10817
10818 /*
10819 * We can either just load the low 16-bit of the GPR into a host register
10820 * for the variable, or we can do so via a shadow copy host register. The
10821 * latter will avoid having to reload it if it's being stored later, but
10822 * will waste a host register if it isn't touched again. Since we don't
10823 * know what going to happen, we choose the latter for now.
10824 */
10825 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10826 kIemNativeGstRegUse_ReadOnly);
10827
10828 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10829 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10830 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
10831 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10832
10833 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
10834 return off;
10835}
10836
10837
10838#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
10839 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
10840
10841#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
10842 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
10843
10844/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
10845DECL_INLINE_THROW(uint32_t)
10846iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
10847{
10848 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10849 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
10850 Assert(iGReg < 16);
10851
10852 /*
10853 * We can either just load the low 16-bit of the GPR into a host register
10854 * for the variable, or we can do so via a shadow copy host register. The
10855 * latter will avoid having to reload it if it's being stored later, but
10856 * will waste a host register if it isn't touched again. Since we don't
10857 * know what going to happen, we choose the latter for now.
10858 */
10859 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10860 kIemNativeGstRegUse_ReadOnly);
10861
10862 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10863 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10864 if (cbSignExtended == sizeof(uint32_t))
10865 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
10866 else
10867 {
10868 Assert(cbSignExtended == sizeof(uint64_t));
10869 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
10870 }
10871 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10872
10873 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
10874 return off;
10875}
10876
10877
10878#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
10879 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
10880
10881#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
10882 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
10883
10884/** Emits code for IEM_MC_FETCH_GREG_U32. */
10885DECL_INLINE_THROW(uint32_t)
10886iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
10887{
10888 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10889 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
10890 Assert(iGReg < 16);
10891
10892 /*
10893 * We can either just load the low 16-bit of the GPR into a host register
10894 * for the variable, or we can do so via a shadow copy host register. The
10895 * latter will avoid having to reload it if it's being stored later, but
10896 * will waste a host register if it isn't touched again. Since we don't
10897 * know what going to happen, we choose the latter for now.
10898 */
10899 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10900 kIemNativeGstRegUse_ReadOnly);
10901
10902 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10903 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10904 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
10905 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10906
10907 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
10908 return off;
10909}
10910
10911
10912#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
10913 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
10914
10915/** Emits code for IEM_MC_FETCH_GREG_U32. */
10916DECL_INLINE_THROW(uint32_t)
10917iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
10918{
10919 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10920 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
10921 Assert(iGReg < 16);
10922
10923 /*
10924 * We can either just load the low 32-bit of the GPR into a host register
10925 * for the variable, or we can do so via a shadow copy host register. The
10926 * latter will avoid having to reload it if it's being stored later, but
10927 * will waste a host register if it isn't touched again. Since we don't
10928 * know what going to happen, we choose the latter for now.
10929 */
10930 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10931 kIemNativeGstRegUse_ReadOnly);
10932
10933 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10934 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10935 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
10936 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10937
10938 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
10939 return off;
10940}
10941
10942
10943#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
10944 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
10945
10946#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
10947 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
10948
10949/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
10950 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
10951DECL_INLINE_THROW(uint32_t)
10952iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
10953{
10954 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10955 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
10956 Assert(iGReg < 16);
10957
10958 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10959 kIemNativeGstRegUse_ReadOnly);
10960
10961 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10962 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10963 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
10964 /** @todo name the register a shadow one already? */
10965 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10966
10967 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
10968 return off;
10969}
10970
10971
10972
10973/*********************************************************************************************************************************
10974* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
10975*********************************************************************************************************************************/
10976
10977#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
10978 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
10979
10980/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
10981DECL_INLINE_THROW(uint32_t)
10982iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
10983{
10984 Assert(iGRegEx < 20);
10985 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
10986 kIemNativeGstRegUse_ForUpdate);
10987#ifdef RT_ARCH_AMD64
10988 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
10989
10990 /* To the lowest byte of the register: mov r8, imm8 */
10991 if (iGRegEx < 16)
10992 {
10993 if (idxGstTmpReg >= 8)
10994 pbCodeBuf[off++] = X86_OP_REX_B;
10995 else if (idxGstTmpReg >= 4)
10996 pbCodeBuf[off++] = X86_OP_REX;
10997 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
10998 pbCodeBuf[off++] = u8Value;
10999 }
11000 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
11001 else if (idxGstTmpReg < 4)
11002 {
11003 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
11004 pbCodeBuf[off++] = u8Value;
11005 }
11006 else
11007 {
11008 /* ror reg64, 8 */
11009 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
11010 pbCodeBuf[off++] = 0xc1;
11011 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
11012 pbCodeBuf[off++] = 8;
11013
11014 /* mov reg8, imm8 */
11015 if (idxGstTmpReg >= 8)
11016 pbCodeBuf[off++] = X86_OP_REX_B;
11017 else if (idxGstTmpReg >= 4)
11018 pbCodeBuf[off++] = X86_OP_REX;
11019 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
11020 pbCodeBuf[off++] = u8Value;
11021
11022 /* rol reg64, 8 */
11023 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
11024 pbCodeBuf[off++] = 0xc1;
11025 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
11026 pbCodeBuf[off++] = 8;
11027 }
11028
11029#elif defined(RT_ARCH_ARM64)
11030 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
11031 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
11032 if (iGRegEx < 16)
11033 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
11034 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
11035 else
11036 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
11037 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
11038 iemNativeRegFreeTmp(pReNative, idxImmReg);
11039
11040#else
11041# error "Port me!"
11042#endif
11043
11044 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11045
11046 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
11047
11048 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11049 return off;
11050}
11051
11052
11053#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
11054 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
11055
11056/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
11057DECL_INLINE_THROW(uint32_t)
11058iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
11059{
11060 Assert(iGRegEx < 20);
11061 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
11062
11063 /*
11064 * If it's a constant value (unlikely) we treat this as a
11065 * IEM_MC_STORE_GREG_U8_CONST statement.
11066 */
11067 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
11068 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
11069 { /* likely */ }
11070 else
11071 {
11072 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
11073 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
11074 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
11075 }
11076
11077 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
11078 kIemNativeGstRegUse_ForUpdate);
11079 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
11080
11081#ifdef RT_ARCH_AMD64
11082 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
11083 if (iGRegEx < 16)
11084 {
11085 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
11086 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
11087 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
11088 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
11089 pbCodeBuf[off++] = X86_OP_REX;
11090 pbCodeBuf[off++] = 0x8a;
11091 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
11092 }
11093 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
11094 else if (idxGstTmpReg < 4 && idxVarReg < 4)
11095 {
11096 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
11097 pbCodeBuf[off++] = 0x8a;
11098 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
11099 }
11100 else
11101 {
11102 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
11103
11104 /* ror reg64, 8 */
11105 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
11106 pbCodeBuf[off++] = 0xc1;
11107 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
11108 pbCodeBuf[off++] = 8;
11109
11110 /* mov reg8, reg8(r/m) */
11111 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
11112 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
11113 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
11114 pbCodeBuf[off++] = X86_OP_REX;
11115 pbCodeBuf[off++] = 0x8a;
11116 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
11117
11118 /* rol reg64, 8 */
11119 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
11120 pbCodeBuf[off++] = 0xc1;
11121 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
11122 pbCodeBuf[off++] = 8;
11123 }
11124
11125#elif defined(RT_ARCH_ARM64)
11126 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
11127 or
11128 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
11129 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11130 if (iGRegEx < 16)
11131 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
11132 else
11133 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
11134
11135#else
11136# error "Port me!"
11137#endif
11138 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11139
11140 iemNativeVarRegisterRelease(pReNative, idxValueVar);
11141
11142 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
11143 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11144 return off;
11145}
11146
11147
11148
11149#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
11150 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
11151
11152/** Emits code for IEM_MC_STORE_GREG_U16. */
11153DECL_INLINE_THROW(uint32_t)
11154iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
11155{
11156 Assert(iGReg < 16);
11157 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11158 kIemNativeGstRegUse_ForUpdate);
11159#ifdef RT_ARCH_AMD64
11160 /* mov reg16, imm16 */
11161 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
11162 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
11163 if (idxGstTmpReg >= 8)
11164 pbCodeBuf[off++] = X86_OP_REX_B;
11165 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
11166 pbCodeBuf[off++] = RT_BYTE1(uValue);
11167 pbCodeBuf[off++] = RT_BYTE2(uValue);
11168
11169#elif defined(RT_ARCH_ARM64)
11170 /* movk xdst, #uValue, lsl #0 */
11171 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11172 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
11173
11174#else
11175# error "Port me!"
11176#endif
11177
11178 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11179
11180 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11181 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11182 return off;
11183}
11184
11185
11186#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
11187 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
11188
11189/** Emits code for IEM_MC_STORE_GREG_U16. */
11190DECL_INLINE_THROW(uint32_t)
11191iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
11192{
11193 Assert(iGReg < 16);
11194 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
11195
11196 /*
11197 * If it's a constant value (unlikely) we treat this as a
11198 * IEM_MC_STORE_GREG_U16_CONST statement.
11199 */
11200 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
11201 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
11202 { /* likely */ }
11203 else
11204 {
11205 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
11206 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
11207 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
11208 }
11209
11210 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11211 kIemNativeGstRegUse_ForUpdate);
11212
11213#ifdef RT_ARCH_AMD64
11214 /* mov reg16, reg16 or [mem16] */
11215 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
11216 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
11217 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
11218 {
11219 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
11220 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
11221 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
11222 pbCodeBuf[off++] = 0x8b;
11223 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
11224 }
11225 else
11226 {
11227 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
11228 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
11229 if (idxGstTmpReg >= 8)
11230 pbCodeBuf[off++] = X86_OP_REX_R;
11231 pbCodeBuf[off++] = 0x8b;
11232 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
11233 }
11234
11235#elif defined(RT_ARCH_ARM64)
11236 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
11237 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
11238 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11239 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
11240 iemNativeVarRegisterRelease(pReNative, idxValueVar);
11241
11242#else
11243# error "Port me!"
11244#endif
11245
11246 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11247
11248 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11249 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11250 return off;
11251}
11252
11253
11254#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
11255 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
11256
11257/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
11258DECL_INLINE_THROW(uint32_t)
11259iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
11260{
11261 Assert(iGReg < 16);
11262 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11263 kIemNativeGstRegUse_ForFullWrite);
11264 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
11265 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11266 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11267 return off;
11268}
11269
11270
11271#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
11272 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
11273
11274/** Emits code for IEM_MC_STORE_GREG_U32. */
11275DECL_INLINE_THROW(uint32_t)
11276iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
11277{
11278 Assert(iGReg < 16);
11279 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
11280
11281 /*
11282 * If it's a constant value (unlikely) we treat this as a
11283 * IEM_MC_STORE_GREG_U32_CONST statement.
11284 */
11285 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
11286 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
11287 { /* likely */ }
11288 else
11289 {
11290 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
11291 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
11292 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
11293 }
11294
11295 /*
11296 * For the rest we allocate a guest register for the variable and writes
11297 * it to the CPUMCTX structure.
11298 */
11299 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
11300 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11301#ifdef VBOX_STRICT
11302 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
11303#endif
11304 iemNativeVarRegisterRelease(pReNative, idxValueVar);
11305 return off;
11306}
11307
11308
11309#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
11310 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
11311
11312/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
11313DECL_INLINE_THROW(uint32_t)
11314iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
11315{
11316 Assert(iGReg < 16);
11317 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11318 kIemNativeGstRegUse_ForFullWrite);
11319 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
11320 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11321 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11322 return off;
11323}
11324
11325
11326#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
11327 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
11328
11329/** Emits code for IEM_MC_STORE_GREG_U64. */
11330DECL_INLINE_THROW(uint32_t)
11331iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
11332{
11333 Assert(iGReg < 16);
11334 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
11335
11336 /*
11337 * If it's a constant value (unlikely) we treat this as a
11338 * IEM_MC_STORE_GREG_U64_CONST statement.
11339 */
11340 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
11341 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
11342 { /* likely */ }
11343 else
11344 {
11345 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
11346 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
11347 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
11348 }
11349
11350 /*
11351 * For the rest we allocate a guest register for the variable and writes
11352 * it to the CPUMCTX structure.
11353 */
11354 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
11355 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11356 iemNativeVarRegisterRelease(pReNative, idxValueVar);
11357 return off;
11358}
11359
11360
11361#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
11362 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
11363
11364/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
11365DECL_INLINE_THROW(uint32_t)
11366iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
11367{
11368 Assert(iGReg < 16);
11369 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11370 kIemNativeGstRegUse_ForUpdate);
11371 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
11372 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11373 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11374 return off;
11375}
11376
11377
11378/*********************************************************************************************************************************
11379* General purpose register manipulation (add, sub). *
11380*********************************************************************************************************************************/
11381
11382#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
11383 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
11384
11385/** Emits code for IEM_MC_ADD_GREG_U16. */
11386DECL_INLINE_THROW(uint32_t)
11387iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
11388{
11389 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11390 kIemNativeGstRegUse_ForUpdate);
11391
11392#ifdef RT_ARCH_AMD64
11393 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
11394 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
11395 if (idxGstTmpReg >= 8)
11396 pbCodeBuf[off++] = X86_OP_REX_B;
11397 if (uAddend == 1)
11398 {
11399 pbCodeBuf[off++] = 0xff; /* inc */
11400 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
11401 }
11402 else
11403 {
11404 pbCodeBuf[off++] = 0x81;
11405 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
11406 pbCodeBuf[off++] = uAddend;
11407 pbCodeBuf[off++] = 0;
11408 }
11409
11410#else
11411 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
11412 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
11413
11414 /* sub tmp, gstgrp, uAddend */
11415 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
11416
11417 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
11418 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
11419
11420 iemNativeRegFreeTmp(pReNative, idxTmpReg);
11421#endif
11422
11423 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11424
11425 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11426
11427 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11428 return off;
11429}
11430
11431
11432#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
11433 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
11434
11435#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
11436 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
11437
11438/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
11439DECL_INLINE_THROW(uint32_t)
11440iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
11441{
11442 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11443 kIemNativeGstRegUse_ForUpdate);
11444
11445#ifdef RT_ARCH_AMD64
11446 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
11447 if (f64Bit)
11448 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
11449 else if (idxGstTmpReg >= 8)
11450 pbCodeBuf[off++] = X86_OP_REX_B;
11451 if (uAddend == 1)
11452 {
11453 pbCodeBuf[off++] = 0xff; /* inc */
11454 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
11455 }
11456 else if (uAddend < 128)
11457 {
11458 pbCodeBuf[off++] = 0x83; /* add */
11459 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
11460 pbCodeBuf[off++] = RT_BYTE1(uAddend);
11461 }
11462 else
11463 {
11464 pbCodeBuf[off++] = 0x81; /* add */
11465 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
11466 pbCodeBuf[off++] = RT_BYTE1(uAddend);
11467 pbCodeBuf[off++] = 0;
11468 pbCodeBuf[off++] = 0;
11469 pbCodeBuf[off++] = 0;
11470 }
11471
11472#else
11473 /* sub tmp, gstgrp, uAddend */
11474 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11475 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
11476
11477#endif
11478
11479 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11480
11481 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11482
11483 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11484 return off;
11485}
11486
11487
11488
11489#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
11490 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
11491
11492/** Emits code for IEM_MC_SUB_GREG_U16. */
11493DECL_INLINE_THROW(uint32_t)
11494iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
11495{
11496 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11497 kIemNativeGstRegUse_ForUpdate);
11498
11499#ifdef RT_ARCH_AMD64
11500 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
11501 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
11502 if (idxGstTmpReg >= 8)
11503 pbCodeBuf[off++] = X86_OP_REX_B;
11504 if (uSubtrahend == 1)
11505 {
11506 pbCodeBuf[off++] = 0xff; /* dec */
11507 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
11508 }
11509 else
11510 {
11511 pbCodeBuf[off++] = 0x81;
11512 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
11513 pbCodeBuf[off++] = uSubtrahend;
11514 pbCodeBuf[off++] = 0;
11515 }
11516
11517#else
11518 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
11519 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
11520
11521 /* sub tmp, gstgrp, uSubtrahend */
11522 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
11523
11524 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
11525 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
11526
11527 iemNativeRegFreeTmp(pReNative, idxTmpReg);
11528#endif
11529
11530 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11531
11532 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11533
11534 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11535 return off;
11536}
11537
11538
11539#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
11540 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
11541
11542#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
11543 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
11544
11545/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
11546DECL_INLINE_THROW(uint32_t)
11547iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
11548{
11549 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11550 kIemNativeGstRegUse_ForUpdate);
11551
11552#ifdef RT_ARCH_AMD64
11553 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
11554 if (f64Bit)
11555 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
11556 else if (idxGstTmpReg >= 8)
11557 pbCodeBuf[off++] = X86_OP_REX_B;
11558 if (uSubtrahend == 1)
11559 {
11560 pbCodeBuf[off++] = 0xff; /* dec */
11561 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
11562 }
11563 else if (uSubtrahend < 128)
11564 {
11565 pbCodeBuf[off++] = 0x83; /* sub */
11566 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
11567 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
11568 }
11569 else
11570 {
11571 pbCodeBuf[off++] = 0x81; /* sub */
11572 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
11573 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
11574 pbCodeBuf[off++] = 0;
11575 pbCodeBuf[off++] = 0;
11576 pbCodeBuf[off++] = 0;
11577 }
11578
11579#else
11580 /* sub tmp, gstgrp, uSubtrahend */
11581 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11582 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
11583
11584#endif
11585
11586 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11587
11588 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11589
11590 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11591 return off;
11592}
11593
11594
11595/*********************************************************************************************************************************
11596* Local variable manipulation (add, sub, and, or). *
11597*********************************************************************************************************************************/
11598
11599#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
11600 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
11601
11602#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
11603 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
11604
11605#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
11606 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
11607
11608#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
11609 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
11610
11611/** Emits code for AND'ing a local and a constant value. */
11612DECL_INLINE_THROW(uint32_t)
11613iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
11614{
11615#ifdef VBOX_STRICT
11616 switch (cbMask)
11617 {
11618 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
11619 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
11620 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
11621 case sizeof(uint64_t): break;
11622 default: AssertFailedBreak();
11623 }
11624#endif
11625
11626 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
11627 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
11628
11629 if (cbMask <= sizeof(uint32_t))
11630 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
11631 else
11632 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
11633
11634 iemNativeVarRegisterRelease(pReNative, idxVar);
11635 return off;
11636}
11637
11638
11639#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
11640 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
11641
11642#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
11643 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
11644
11645#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
11646 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
11647
11648#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
11649 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
11650
11651/** Emits code for OR'ing a local and a constant value. */
11652DECL_INLINE_THROW(uint32_t)
11653iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
11654{
11655#ifdef VBOX_STRICT
11656 switch (cbMask)
11657 {
11658 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
11659 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
11660 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
11661 case sizeof(uint64_t): break;
11662 default: AssertFailedBreak();
11663 }
11664#endif
11665
11666 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
11667 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
11668
11669 if (cbMask <= sizeof(uint32_t))
11670 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
11671 else
11672 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
11673
11674 iemNativeVarRegisterRelease(pReNative, idxVar);
11675 return off;
11676}
11677
11678
11679#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
11680 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
11681
11682#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
11683 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
11684
11685#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
11686 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
11687
11688/** Emits code for reversing the byte order in a local value. */
11689DECL_INLINE_THROW(uint32_t)
11690iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
11691{
11692 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
11693 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
11694
11695 switch (cbLocal)
11696 {
11697 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
11698 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
11699 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
11700 default: AssertFailedBreak();
11701 }
11702
11703 iemNativeVarRegisterRelease(pReNative, idxVar);
11704 return off;
11705}
11706
11707
11708
11709/*********************************************************************************************************************************
11710* EFLAGS *
11711*********************************************************************************************************************************/
11712
11713#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
11714# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
11715#else
11716# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
11717 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
11718
11719DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
11720{
11721 if (fEflOutput)
11722 {
11723 PVMCPUCC const pVCpu = pReNative->pVCpu;
11724# ifndef IEMLIVENESS_EXTENDED_LAYOUT
11725 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
11726 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
11727 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
11728# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
11729 if (fEflOutput & (a_fEfl)) \
11730 { \
11731 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
11732 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
11733 else \
11734 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
11735 } else do { } while (0)
11736# else
11737 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
11738 IEMLIVENESSBIT const LivenessClobbered =
11739 {
11740 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
11741 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
11742 | pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
11743 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
11744 };
11745 IEMLIVENESSBIT const LivenessDelayable =
11746 {
11747 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
11748 & pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
11749 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
11750 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
11751 };
11752# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
11753 if (fEflOutput & (a_fEfl)) \
11754 { \
11755 if (LivenessClobbered.a_fLivenessMember) \
11756 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
11757 else if (LivenessDelayable.a_fLivenessMember) \
11758 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
11759 else \
11760 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
11761 } else do { } while (0)
11762# endif
11763 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
11764 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
11765 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
11766 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
11767 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
11768 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
11769 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
11770# undef CHECK_FLAG_AND_UPDATE_STATS
11771 }
11772 RT_NOREF(fEflInput);
11773}
11774#endif /* VBOX_WITH_STATISTICS */
11775
11776#undef IEM_MC_FETCH_EFLAGS /* should not be used */
11777#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
11778 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags, a_fEflInput, a_fEflOutput)
11779
11780/** Handles IEM_MC_FETCH_EFLAGS_EX. */
11781DECL_INLINE_THROW(uint32_t)
11782iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags,
11783 uint32_t fEflInput, uint32_t fEflOutput)
11784{
11785 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
11786 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
11787 RT_NOREF(fEflInput, fEflOutput);
11788
11789#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
11790# ifdef VBOX_STRICT
11791 if ( pReNative->idxCurCall != 0
11792 && (fEflInput != 0 || fEflOutput != 0) /* for NOT these are both zero for now. */)
11793 {
11794 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
11795 uint32_t const fBoth = fEflInput | fEflOutput;
11796# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
11797 AssertMsg( !(fBoth & (a_fElfConst)) \
11798 || (!(fEflInput & (a_fElfConst)) \
11799 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
11800 : !(fEflOutput & (a_fElfConst)) \
11801 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
11802 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
11803 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
11804 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
11805 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
11806 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
11807 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
11808 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
11809 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
11810 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
11811# undef ASSERT_ONE_EFL
11812 }
11813# endif
11814#endif
11815
11816 /** @todo this is suboptimial. EFLAGS is probably shadowed and we should use
11817 * the existing shadow copy. */
11818 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
11819 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
11820 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
11821 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
11822 return off;
11823}
11824
11825
11826
11827/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
11828 * start using it with custom native code emission (inlining assembly
11829 * instruction helpers). */
11830#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
11831#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
11832 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
11833 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput)
11834
11835/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
11836DECL_INLINE_THROW(uint32_t)
11837iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fEflOutput)
11838{
11839 RT_NOREF(fEflOutput);
11840 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
11841 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
11842
11843#ifdef VBOX_STRICT
11844 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
11845 uint32_t offFixup = off;
11846 off = iemNativeEmitJnzToFixed(pReNative, off, off);
11847 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
11848 iemNativeFixupFixedJump(pReNative, offFixup, off);
11849
11850 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
11851 offFixup = off;
11852 off = iemNativeEmitJzToFixed(pReNative, off, off);
11853 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
11854 iemNativeFixupFixedJump(pReNative, offFixup, off);
11855
11856 /** @todo validate that only bits in the fElfOutput mask changed. */
11857#endif
11858
11859 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
11860 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
11861 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
11862 return off;
11863}
11864
11865
11866
11867/*********************************************************************************************************************************
11868* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
11869*********************************************************************************************************************************/
11870
11871#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
11872 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
11873
11874#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
11875 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
11876
11877#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
11878 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
11879
11880
11881/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
11882 * IEM_MC_FETCH_SREG_ZX_U64. */
11883DECL_INLINE_THROW(uint32_t)
11884iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
11885{
11886 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
11887 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
11888 Assert(iSReg < X86_SREG_COUNT);
11889
11890 /*
11891 * For now, we will not create a shadow copy of a selector. The rational
11892 * is that since we do not recompile the popping and loading of segment
11893 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
11894 * pushing and moving to registers, there is only a small chance that the
11895 * shadow copy will be accessed again before the register is reloaded. One
11896 * scenario would be nested called in 16-bit code, but I doubt it's worth
11897 * the extra register pressure atm.
11898 *
11899 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
11900 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
11901 * store scencario covered at present (r160730).
11902 */
11903 iemNativeVarSetKindToStack(pReNative, idxDstVar);
11904 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
11905 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
11906 iemNativeVarRegisterRelease(pReNative, idxDstVar);
11907 return off;
11908}
11909
11910
11911
11912/*********************************************************************************************************************************
11913* Register references. *
11914*********************************************************************************************************************************/
11915
11916#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
11917 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
11918
11919#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGRegEx) \
11920 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
11921
11922/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
11923DECL_INLINE_THROW(uint32_t)
11924iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
11925{
11926 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
11927 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
11928 Assert(iGRegEx < 20);
11929
11930 if (iGRegEx < 16)
11931 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
11932 else
11933 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
11934
11935 /* If we've delayed writing back the register value, flush it now. */
11936 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
11937
11938 /* If it's not a const reference we need to flush the shadow copy of the register now. */
11939 if (!fConst)
11940 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
11941
11942 return off;
11943}
11944
11945#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
11946 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
11947
11948#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
11949 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
11950
11951#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
11952 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
11953
11954#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
11955 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
11956
11957#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
11958 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
11959
11960#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
11961 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
11962
11963#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
11964 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
11965
11966#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
11967 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
11968
11969#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
11970 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
11971
11972#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
11973 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
11974
11975/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
11976DECL_INLINE_THROW(uint32_t)
11977iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
11978{
11979 Assert(iGReg < 16);
11980 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
11981 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
11982
11983 /* If we've delayed writing back the register value, flush it now. */
11984 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
11985
11986 /* If it's not a const reference we need to flush the shadow copy of the register now. */
11987 if (!fConst)
11988 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
11989
11990 return off;
11991}
11992
11993
11994#undef IEM_MC_REF_EFLAGS /* should not be used. */
11995#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
11996 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
11997 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags)
11998
11999/** Handles IEM_MC_REF_EFLAGS. */
12000DECL_INLINE_THROW(uint32_t)
12001iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
12002{
12003 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
12004 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
12005
12006 /* If we've delayed writing back the register value, flush it now. */
12007 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
12008
12009 /* If there is a shadow copy of guest EFLAGS, flush it now. */
12010 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
12011
12012 return off;
12013}
12014
12015
12016/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
12017 * different code from threaded recompiler, maybe it would be helpful. For now
12018 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
12019#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
12020
12021
12022#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg) \
12023 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, false /*fConst*/)
12024
12025#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
12026 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, true /*fConst*/)
12027
12028#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
12029 off = iemNativeEmitRefXregXxx(pReNative, off, a_pXmmDst, a_iXReg, true /*fConst*/)
12030
12031/** Handles IEM_MC_REF_XREG_xxx[_CONST]. */
12032DECL_INLINE_THROW(uint32_t)
12033iemNativeEmitRefXregXxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iXReg, bool fConst)
12034{
12035 Assert(iXReg < 16);
12036 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_XReg, iXReg);
12037 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
12038
12039 /* If we've delayed writing back the register value, flush it now. */
12040 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_XReg, iXReg);
12041
12042#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
12043 /* If it's not a const reference we need to flush the shadow copy of the register now. */
12044 if (!fConst)
12045 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(iXReg)));
12046#else
12047 RT_NOREF(fConst);
12048#endif
12049
12050 return off;
12051}
12052
12053
12054#define IEM_MC_REF_MXCSR(a_pfMxcsr) \
12055 off = iemNativeEmitRefMxcsr(pReNative, off, a_pfMxcsr)
12056
12057/** Handles IEM_MC_REF_MXCSR. */
12058DECL_INLINE_THROW(uint32_t)
12059iemNativeEmitRefMxcsr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
12060{
12061 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_MxCsr, 0);
12062 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
12063
12064 /* If we've delayed writing back the register value, flush it now. */
12065 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_MxCsr, 0);
12066
12067 /* If there is a shadow copy of guest MXCSR, flush it now. */
12068 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_MxCsr));
12069
12070 return off;
12071}
12072
12073
12074
12075/*********************************************************************************************************************************
12076* Effective Address Calculation *
12077*********************************************************************************************************************************/
12078#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
12079 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
12080
12081/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
12082 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
12083DECL_INLINE_THROW(uint32_t)
12084iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
12085 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
12086{
12087 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
12088
12089 /*
12090 * Handle the disp16 form with no registers first.
12091 *
12092 * Convert to an immediate value, as that'll delay the register allocation
12093 * and assignment till the memory access / call / whatever and we can use
12094 * a more appropriate register (or none at all).
12095 */
12096 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
12097 {
12098 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
12099 return off;
12100 }
12101
12102 /* Determin the displacment. */
12103 uint16_t u16EffAddr;
12104 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
12105 {
12106 case 0: u16EffAddr = 0; break;
12107 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
12108 case 2: u16EffAddr = u16Disp; break;
12109 default: AssertFailedStmt(u16EffAddr = 0);
12110 }
12111
12112 /* Determine the registers involved. */
12113 uint8_t idxGstRegBase;
12114 uint8_t idxGstRegIndex;
12115 switch (bRm & X86_MODRM_RM_MASK)
12116 {
12117 case 0:
12118 idxGstRegBase = X86_GREG_xBX;
12119 idxGstRegIndex = X86_GREG_xSI;
12120 break;
12121 case 1:
12122 idxGstRegBase = X86_GREG_xBX;
12123 idxGstRegIndex = X86_GREG_xDI;
12124 break;
12125 case 2:
12126 idxGstRegBase = X86_GREG_xBP;
12127 idxGstRegIndex = X86_GREG_xSI;
12128 break;
12129 case 3:
12130 idxGstRegBase = X86_GREG_xBP;
12131 idxGstRegIndex = X86_GREG_xDI;
12132 break;
12133 case 4:
12134 idxGstRegBase = X86_GREG_xSI;
12135 idxGstRegIndex = UINT8_MAX;
12136 break;
12137 case 5:
12138 idxGstRegBase = X86_GREG_xDI;
12139 idxGstRegIndex = UINT8_MAX;
12140 break;
12141 case 6:
12142 idxGstRegBase = X86_GREG_xBP;
12143 idxGstRegIndex = UINT8_MAX;
12144 break;
12145#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
12146 default:
12147#endif
12148 case 7:
12149 idxGstRegBase = X86_GREG_xBX;
12150 idxGstRegIndex = UINT8_MAX;
12151 break;
12152 }
12153
12154 /*
12155 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
12156 */
12157 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
12158 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
12159 kIemNativeGstRegUse_ReadOnly);
12160 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
12161 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
12162 kIemNativeGstRegUse_ReadOnly)
12163 : UINT8_MAX;
12164#ifdef RT_ARCH_AMD64
12165 if (idxRegIndex == UINT8_MAX)
12166 {
12167 if (u16EffAddr == 0)
12168 {
12169 /* movxz ret, base */
12170 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
12171 }
12172 else
12173 {
12174 /* lea ret32, [base64 + disp32] */
12175 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
12176 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
12177 if (idxRegRet >= 8 || idxRegBase >= 8)
12178 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
12179 pbCodeBuf[off++] = 0x8d;
12180 if (idxRegBase != X86_GREG_x12 /*SIB*/)
12181 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
12182 else
12183 {
12184 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
12185 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
12186 }
12187 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
12188 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
12189 pbCodeBuf[off++] = 0;
12190 pbCodeBuf[off++] = 0;
12191 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12192
12193 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
12194 }
12195 }
12196 else
12197 {
12198 /* lea ret32, [index64 + base64 (+ disp32)] */
12199 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
12200 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
12201 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
12202 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
12203 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
12204 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
12205 pbCodeBuf[off++] = 0x8d;
12206 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
12207 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
12208 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
12209 if (bMod == X86_MOD_MEM4)
12210 {
12211 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
12212 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
12213 pbCodeBuf[off++] = 0;
12214 pbCodeBuf[off++] = 0;
12215 }
12216 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12217 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
12218 }
12219
12220#elif defined(RT_ARCH_ARM64)
12221 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
12222 if (u16EffAddr == 0)
12223 {
12224 if (idxRegIndex == UINT8_MAX)
12225 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
12226 else
12227 {
12228 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
12229 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
12230 }
12231 }
12232 else
12233 {
12234 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
12235 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
12236 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
12237 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
12238 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
12239 else
12240 {
12241 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
12242 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
12243 }
12244 if (idxRegIndex != UINT8_MAX)
12245 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
12246 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
12247 }
12248
12249#else
12250# error "port me"
12251#endif
12252
12253 if (idxRegIndex != UINT8_MAX)
12254 iemNativeRegFreeTmp(pReNative, idxRegIndex);
12255 iemNativeRegFreeTmp(pReNative, idxRegBase);
12256 iemNativeVarRegisterRelease(pReNative, idxVarRet);
12257 return off;
12258}
12259
12260
12261#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
12262 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
12263
12264/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
12265 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
12266DECL_INLINE_THROW(uint32_t)
12267iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
12268 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
12269{
12270 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
12271
12272 /*
12273 * Handle the disp32 form with no registers first.
12274 *
12275 * Convert to an immediate value, as that'll delay the register allocation
12276 * and assignment till the memory access / call / whatever and we can use
12277 * a more appropriate register (or none at all).
12278 */
12279 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
12280 {
12281 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
12282 return off;
12283 }
12284
12285 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
12286 uint32_t u32EffAddr = 0;
12287 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
12288 {
12289 case 0: break;
12290 case 1: u32EffAddr = (int8_t)u32Disp; break;
12291 case 2: u32EffAddr = u32Disp; break;
12292 default: AssertFailed();
12293 }
12294
12295 /* Get the register (or SIB) value. */
12296 uint8_t idxGstRegBase = UINT8_MAX;
12297 uint8_t idxGstRegIndex = UINT8_MAX;
12298 uint8_t cShiftIndex = 0;
12299 switch (bRm & X86_MODRM_RM_MASK)
12300 {
12301 case 0: idxGstRegBase = X86_GREG_xAX; break;
12302 case 1: idxGstRegBase = X86_GREG_xCX; break;
12303 case 2: idxGstRegBase = X86_GREG_xDX; break;
12304 case 3: idxGstRegBase = X86_GREG_xBX; break;
12305 case 4: /* SIB */
12306 {
12307 /* index /w scaling . */
12308 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
12309 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
12310 {
12311 case 0: idxGstRegIndex = X86_GREG_xAX; break;
12312 case 1: idxGstRegIndex = X86_GREG_xCX; break;
12313 case 2: idxGstRegIndex = X86_GREG_xDX; break;
12314 case 3: idxGstRegIndex = X86_GREG_xBX; break;
12315 case 4: cShiftIndex = 0; /*no index*/ break;
12316 case 5: idxGstRegIndex = X86_GREG_xBP; break;
12317 case 6: idxGstRegIndex = X86_GREG_xSI; break;
12318 case 7: idxGstRegIndex = X86_GREG_xDI; break;
12319 }
12320
12321 /* base */
12322 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
12323 {
12324 case 0: idxGstRegBase = X86_GREG_xAX; break;
12325 case 1: idxGstRegBase = X86_GREG_xCX; break;
12326 case 2: idxGstRegBase = X86_GREG_xDX; break;
12327 case 3: idxGstRegBase = X86_GREG_xBX; break;
12328 case 4:
12329 idxGstRegBase = X86_GREG_xSP;
12330 u32EffAddr += uSibAndRspOffset >> 8;
12331 break;
12332 case 5:
12333 if ((bRm & X86_MODRM_MOD_MASK) != 0)
12334 idxGstRegBase = X86_GREG_xBP;
12335 else
12336 {
12337 Assert(u32EffAddr == 0);
12338 u32EffAddr = u32Disp;
12339 }
12340 break;
12341 case 6: idxGstRegBase = X86_GREG_xSI; break;
12342 case 7: idxGstRegBase = X86_GREG_xDI; break;
12343 }
12344 break;
12345 }
12346 case 5: idxGstRegBase = X86_GREG_xBP; break;
12347 case 6: idxGstRegBase = X86_GREG_xSI; break;
12348 case 7: idxGstRegBase = X86_GREG_xDI; break;
12349 }
12350
12351 /*
12352 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
12353 * the start of the function.
12354 */
12355 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
12356 {
12357 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
12358 return off;
12359 }
12360
12361 /*
12362 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
12363 */
12364 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
12365 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
12366 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
12367 kIemNativeGstRegUse_ReadOnly);
12368 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
12369 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
12370 kIemNativeGstRegUse_ReadOnly);
12371
12372 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
12373 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
12374 {
12375 idxRegBase = idxRegIndex;
12376 idxRegIndex = UINT8_MAX;
12377 }
12378
12379#ifdef RT_ARCH_AMD64
12380 if (idxRegIndex == UINT8_MAX)
12381 {
12382 if (u32EffAddr == 0)
12383 {
12384 /* mov ret, base */
12385 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
12386 }
12387 else
12388 {
12389 /* lea ret32, [base64 + disp32] */
12390 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
12391 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
12392 if (idxRegRet >= 8 || idxRegBase >= 8)
12393 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
12394 pbCodeBuf[off++] = 0x8d;
12395 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
12396 if (idxRegBase != X86_GREG_x12 /*SIB*/)
12397 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
12398 else
12399 {
12400 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
12401 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
12402 }
12403 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
12404 if (bMod == X86_MOD_MEM4)
12405 {
12406 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
12407 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
12408 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
12409 }
12410 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12411 }
12412 }
12413 else
12414 {
12415 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
12416 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
12417 if (idxRegBase == UINT8_MAX)
12418 {
12419 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
12420 if (idxRegRet >= 8 || idxRegIndex >= 8)
12421 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
12422 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
12423 pbCodeBuf[off++] = 0x8d;
12424 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
12425 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
12426 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
12427 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
12428 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
12429 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
12430 }
12431 else
12432 {
12433 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
12434 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
12435 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
12436 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
12437 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
12438 pbCodeBuf[off++] = 0x8d;
12439 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
12440 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
12441 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
12442 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
12443 if (bMod != X86_MOD_MEM0)
12444 {
12445 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
12446 if (bMod == X86_MOD_MEM4)
12447 {
12448 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
12449 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
12450 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
12451 }
12452 }
12453 }
12454 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12455 }
12456
12457#elif defined(RT_ARCH_ARM64)
12458 if (u32EffAddr == 0)
12459 {
12460 if (idxRegIndex == UINT8_MAX)
12461 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
12462 else if (idxRegBase == UINT8_MAX)
12463 {
12464 if (cShiftIndex == 0)
12465 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
12466 else
12467 {
12468 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12469 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
12470 }
12471 }
12472 else
12473 {
12474 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12475 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
12476 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
12477 }
12478 }
12479 else
12480 {
12481 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
12482 {
12483 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12484 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
12485 }
12486 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
12487 {
12488 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12489 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
12490 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
12491 }
12492 else
12493 {
12494 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
12495 if (idxRegBase != UINT8_MAX)
12496 {
12497 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12498 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
12499 }
12500 }
12501 if (idxRegIndex != UINT8_MAX)
12502 {
12503 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12504 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
12505 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
12506 }
12507 }
12508
12509#else
12510# error "port me"
12511#endif
12512
12513 if (idxRegIndex != UINT8_MAX)
12514 iemNativeRegFreeTmp(pReNative, idxRegIndex);
12515 if (idxRegBase != UINT8_MAX)
12516 iemNativeRegFreeTmp(pReNative, idxRegBase);
12517 iemNativeVarRegisterRelease(pReNative, idxVarRet);
12518 return off;
12519}
12520
12521
12522#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
12523 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
12524 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
12525
12526#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
12527 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
12528 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
12529
12530#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
12531 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
12532 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
12533
12534/**
12535 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
12536 *
12537 * @returns New off.
12538 * @param pReNative .
12539 * @param off .
12540 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
12541 * bit 4 to REX.X. The two bits are part of the
12542 * REG sub-field, which isn't needed in this
12543 * function.
12544 * @param uSibAndRspOffset Two parts:
12545 * - The first 8 bits make up the SIB byte.
12546 * - The next 8 bits are the fixed RSP/ESP offset
12547 * in case of a pop [xSP].
12548 * @param u32Disp The displacement byte/word/dword, if any.
12549 * @param cbInstr The size of the fully decoded instruction. Used
12550 * for RIP relative addressing.
12551 * @param idxVarRet The result variable number.
12552 * @param f64Bit Whether to use a 64-bit or 32-bit address size
12553 * when calculating the address.
12554 *
12555 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
12556 */
12557DECL_INLINE_THROW(uint32_t)
12558iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
12559 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
12560{
12561 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
12562
12563 /*
12564 * Special case the rip + disp32 form first.
12565 */
12566 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
12567 {
12568#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
12569 /* Need to take the current PC offset into account for the displacement, no need to flush here
12570 * as the PC is only accessed readonly and there is no branching or calling helpers involved. */
12571 u32Disp += pReNative->Core.offPc;
12572#endif
12573
12574 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
12575 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
12576 kIemNativeGstRegUse_ReadOnly);
12577#ifdef RT_ARCH_AMD64
12578 if (f64Bit)
12579 {
12580 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
12581 if ((int32_t)offFinalDisp == offFinalDisp)
12582 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
12583 else
12584 {
12585 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
12586 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
12587 }
12588 }
12589 else
12590 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
12591
12592#elif defined(RT_ARCH_ARM64)
12593 if (f64Bit)
12594 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
12595 (int64_t)(int32_t)u32Disp + cbInstr);
12596 else
12597 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
12598 (int32_t)u32Disp + cbInstr);
12599
12600#else
12601# error "Port me!"
12602#endif
12603 iemNativeRegFreeTmp(pReNative, idxRegPc);
12604 iemNativeVarRegisterRelease(pReNative, idxVarRet);
12605 return off;
12606 }
12607
12608 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
12609 int64_t i64EffAddr = 0;
12610 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
12611 {
12612 case 0: break;
12613 case 1: i64EffAddr = (int8_t)u32Disp; break;
12614 case 2: i64EffAddr = (int32_t)u32Disp; break;
12615 default: AssertFailed();
12616 }
12617
12618 /* Get the register (or SIB) value. */
12619 uint8_t idxGstRegBase = UINT8_MAX;
12620 uint8_t idxGstRegIndex = UINT8_MAX;
12621 uint8_t cShiftIndex = 0;
12622 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
12623 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
12624 else /* SIB: */
12625 {
12626 /* index /w scaling . */
12627 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
12628 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
12629 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
12630 if (idxGstRegIndex == 4)
12631 {
12632 /* no index */
12633 cShiftIndex = 0;
12634 idxGstRegIndex = UINT8_MAX;
12635 }
12636
12637 /* base */
12638 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
12639 if (idxGstRegBase == 4)
12640 {
12641 /* pop [rsp] hack */
12642 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
12643 }
12644 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
12645 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
12646 {
12647 /* mod=0 and base=5 -> disp32, no base reg. */
12648 Assert(i64EffAddr == 0);
12649 i64EffAddr = (int32_t)u32Disp;
12650 idxGstRegBase = UINT8_MAX;
12651 }
12652 }
12653
12654 /*
12655 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
12656 * the start of the function.
12657 */
12658 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
12659 {
12660 if (f64Bit)
12661 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
12662 else
12663 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
12664 return off;
12665 }
12666
12667 /*
12668 * Now emit code that calculates:
12669 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
12670 * or if !f64Bit:
12671 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
12672 */
12673 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
12674 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
12675 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
12676 kIemNativeGstRegUse_ReadOnly);
12677 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
12678 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
12679 kIemNativeGstRegUse_ReadOnly);
12680
12681 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
12682 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
12683 {
12684 idxRegBase = idxRegIndex;
12685 idxRegIndex = UINT8_MAX;
12686 }
12687
12688#ifdef RT_ARCH_AMD64
12689 uint8_t bFinalAdj;
12690 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
12691 bFinalAdj = 0; /* likely */
12692 else
12693 {
12694 /* pop [rsp] with a problematic disp32 value. Split out the
12695 RSP offset and add it separately afterwards (bFinalAdj). */
12696 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
12697 Assert(idxGstRegBase == X86_GREG_xSP);
12698 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
12699 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
12700 Assert(bFinalAdj != 0);
12701 i64EffAddr -= bFinalAdj;
12702 Assert((int32_t)i64EffAddr == i64EffAddr);
12703 }
12704 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
12705//pReNative->pInstrBuf[off++] = 0xcc;
12706
12707 if (idxRegIndex == UINT8_MAX)
12708 {
12709 if (u32EffAddr == 0)
12710 {
12711 /* mov ret, base */
12712 if (f64Bit)
12713 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
12714 else
12715 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
12716 }
12717 else
12718 {
12719 /* lea ret, [base + disp32] */
12720 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
12721 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
12722 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
12723 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
12724 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
12725 | (f64Bit ? X86_OP_REX_W : 0);
12726 pbCodeBuf[off++] = 0x8d;
12727 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
12728 if (idxRegBase != X86_GREG_x12 /*SIB*/)
12729 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
12730 else
12731 {
12732 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
12733 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
12734 }
12735 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
12736 if (bMod == X86_MOD_MEM4)
12737 {
12738 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
12739 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
12740 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
12741 }
12742 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12743 }
12744 }
12745 else
12746 {
12747 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
12748 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
12749 if (idxRegBase == UINT8_MAX)
12750 {
12751 /* lea ret, [(index64 << cShiftIndex) + disp32] */
12752 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
12753 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
12754 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
12755 | (f64Bit ? X86_OP_REX_W : 0);
12756 pbCodeBuf[off++] = 0x8d;
12757 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
12758 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
12759 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
12760 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
12761 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
12762 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
12763 }
12764 else
12765 {
12766 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
12767 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
12768 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
12769 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
12770 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
12771 | (f64Bit ? X86_OP_REX_W : 0);
12772 pbCodeBuf[off++] = 0x8d;
12773 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
12774 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
12775 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
12776 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
12777 if (bMod != X86_MOD_MEM0)
12778 {
12779 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
12780 if (bMod == X86_MOD_MEM4)
12781 {
12782 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
12783 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
12784 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
12785 }
12786 }
12787 }
12788 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12789 }
12790
12791 if (!bFinalAdj)
12792 { /* likely */ }
12793 else
12794 {
12795 Assert(f64Bit);
12796 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
12797 }
12798
12799#elif defined(RT_ARCH_ARM64)
12800 if (i64EffAddr == 0)
12801 {
12802 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12803 if (idxRegIndex == UINT8_MAX)
12804 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
12805 else if (idxRegBase != UINT8_MAX)
12806 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
12807 f64Bit, false /*fSetFlags*/, cShiftIndex);
12808 else
12809 {
12810 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
12811 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
12812 }
12813 }
12814 else
12815 {
12816 if (f64Bit)
12817 { /* likely */ }
12818 else
12819 i64EffAddr = (int32_t)i64EffAddr;
12820
12821 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
12822 {
12823 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12824 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
12825 }
12826 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
12827 {
12828 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12829 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
12830 }
12831 else
12832 {
12833 if (f64Bit)
12834 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
12835 else
12836 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
12837 if (idxRegBase != UINT8_MAX)
12838 {
12839 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12840 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
12841 }
12842 }
12843 if (idxRegIndex != UINT8_MAX)
12844 {
12845 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12846 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
12847 f64Bit, false /*fSetFlags*/, cShiftIndex);
12848 }
12849 }
12850
12851#else
12852# error "port me"
12853#endif
12854
12855 if (idxRegIndex != UINT8_MAX)
12856 iemNativeRegFreeTmp(pReNative, idxRegIndex);
12857 if (idxRegBase != UINT8_MAX)
12858 iemNativeRegFreeTmp(pReNative, idxRegBase);
12859 iemNativeVarRegisterRelease(pReNative, idxVarRet);
12860 return off;
12861}
12862
12863
12864/*********************************************************************************************************************************
12865* TLB Lookup. *
12866*********************************************************************************************************************************/
12867
12868/**
12869 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
12870 */
12871DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
12872{
12873 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
12874 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
12875 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
12876 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
12877
12878 /* Do the lookup manually. */
12879 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
12880 uint64_t const uTag = IEMTLB_CALC_TAG( &pVCpu->iem.s.DataTlb, GCPtrFlat);
12881 PIEMTLBENTRY const pTlbe = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
12882 if (RT_LIKELY(pTlbe->uTag == uTag))
12883 {
12884 /*
12885 * Check TLB page table level access flags.
12886 */
12887 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
12888 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
12889 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
12890 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
12891 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
12892 | IEMTLBE_F_PG_UNASSIGNED
12893 | IEMTLBE_F_PT_NO_ACCESSED
12894 | fNoWriteNoDirty | fNoUser);
12895 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
12896 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
12897 {
12898 /*
12899 * Return the address.
12900 */
12901 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
12902 if ((uintptr_t)pbAddr == uResult)
12903 return;
12904 RT_NOREF(cbMem);
12905 AssertFailed();
12906 }
12907 else
12908 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
12909 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
12910 }
12911 else
12912 AssertFailed();
12913 RT_BREAKPOINT();
12914}
12915
12916/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
12917
12918
12919/*********************************************************************************************************************************
12920* Memory fetches and stores common *
12921*********************************************************************************************************************************/
12922
12923typedef enum IEMNATIVEMITMEMOP
12924{
12925 kIemNativeEmitMemOp_Store = 0,
12926 kIemNativeEmitMemOp_Fetch,
12927 kIemNativeEmitMemOp_Fetch_Zx_U16,
12928 kIemNativeEmitMemOp_Fetch_Zx_U32,
12929 kIemNativeEmitMemOp_Fetch_Zx_U64,
12930 kIemNativeEmitMemOp_Fetch_Sx_U16,
12931 kIemNativeEmitMemOp_Fetch_Sx_U32,
12932 kIemNativeEmitMemOp_Fetch_Sx_U64
12933} IEMNATIVEMITMEMOP;
12934
12935/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
12936 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
12937 * (with iSegReg = UINT8_MAX). */
12938DECL_INLINE_THROW(uint32_t)
12939iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
12940 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint8_t fAlignMask, IEMNATIVEMITMEMOP enmOp,
12941 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
12942{
12943 /*
12944 * Assert sanity.
12945 */
12946 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
12947 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
12948 Assert( enmOp != kIemNativeEmitMemOp_Store
12949 || pVarValue->enmKind == kIemNativeVarKind_Immediate
12950 || pVarValue->enmKind == kIemNativeVarKind_Stack);
12951 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
12952 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
12953 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
12954 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
12955 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
12956 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
12957 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
12958 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
12959#ifdef VBOX_STRICT
12960 if (iSegReg == UINT8_MAX)
12961 {
12962 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
12963 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
12964 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
12965 switch (cbMem)
12966 {
12967 case 1:
12968 Assert( pfnFunction
12969 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
12970 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
12971 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
12972 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
12973 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
12974 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
12975 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
12976 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
12977 : UINT64_C(0xc000b000a0009000) ));
12978 break;
12979 case 2:
12980 Assert( pfnFunction
12981 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
12982 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
12983 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
12984 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
12985 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
12986 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
12987 : UINT64_C(0xc000b000a0009000) ));
12988 break;
12989 case 4:
12990 Assert( pfnFunction
12991 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
12992 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
12993 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
12994 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
12995 : UINT64_C(0xc000b000a0009000) ));
12996 break;
12997 case 8:
12998 Assert( pfnFunction
12999 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
13000 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
13001 : UINT64_C(0xc000b000a0009000) ));
13002 break;
13003 }
13004 }
13005 else
13006 {
13007 Assert(iSegReg < 6);
13008 switch (cbMem)
13009 {
13010 case 1:
13011 Assert( pfnFunction
13012 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
13013 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
13014 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
13015 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
13016 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
13017 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
13018 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
13019 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
13020 : UINT64_C(0xc000b000a0009000) ));
13021 break;
13022 case 2:
13023 Assert( pfnFunction
13024 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
13025 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
13026 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
13027 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
13028 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
13029 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
13030 : UINT64_C(0xc000b000a0009000) ));
13031 break;
13032 case 4:
13033 Assert( pfnFunction
13034 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
13035 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
13036 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
13037 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
13038 : UINT64_C(0xc000b000a0009000) ));
13039 break;
13040 case 8:
13041 Assert( pfnFunction
13042 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
13043 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
13044 : UINT64_C(0xc000b000a0009000) ));
13045 break;
13046 }
13047 }
13048#endif
13049
13050#ifdef VBOX_STRICT
13051 /*
13052 * Check that the fExec flags we've got make sense.
13053 */
13054 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
13055#endif
13056
13057 /*
13058 * To keep things simple we have to commit any pending writes first as we
13059 * may end up making calls.
13060 */
13061 /** @todo we could postpone this till we make the call and reload the
13062 * registers after returning from the call. Not sure if that's sensible or
13063 * not, though. */
13064#ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
13065 off = iemNativeRegFlushPendingWrites(pReNative, off);
13066#else
13067 /* The program counter is treated differently for now. */
13068 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
13069#endif
13070
13071#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13072 /*
13073 * Move/spill/flush stuff out of call-volatile registers.
13074 * This is the easy way out. We could contain this to the tlb-miss branch
13075 * by saving and restoring active stuff here.
13076 */
13077 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
13078#endif
13079
13080 /*
13081 * Define labels and allocate the result register (trying for the return
13082 * register if we can).
13083 */
13084 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
13085 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
13086 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
13087 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
13088 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
13089 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
13090 uint8_t const idxRegValueStore = !TlbState.fSkip
13091 && enmOp == kIemNativeEmitMemOp_Store
13092 && pVarValue->enmKind != kIemNativeVarKind_Immediate
13093 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off)
13094 : UINT8_MAX;
13095 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
13096 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
13097 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
13098 : UINT32_MAX;
13099
13100 /*
13101 * Jump to the TLB lookup code.
13102 */
13103 if (!TlbState.fSkip)
13104 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
13105
13106 /*
13107 * TlbMiss:
13108 *
13109 * Call helper to do the fetching.
13110 * We flush all guest register shadow copies here.
13111 */
13112 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
13113
13114#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
13115 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
13116#else
13117 RT_NOREF(idxInstr);
13118#endif
13119
13120#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
13121 if (pReNative->Core.offPc)
13122 {
13123 /*
13124 * Update the program counter but restore it at the end of the TlbMiss branch.
13125 * This should allow delaying more program counter updates for the TlbLookup and hit paths
13126 * which are hopefully much more frequent, reducing the amount of memory accesses.
13127 */
13128 /* Allocate a temporary PC register. */
13129 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
13130
13131 /* Perform the addition and store the result. */
13132 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
13133 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
13134
13135 /* Free and flush the PC register. */
13136 iemNativeRegFreeTmp(pReNative, idxPcReg);
13137 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
13138 }
13139#endif
13140
13141#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13142 /* Save variables in volatile registers. */
13143 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
13144 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
13145 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
13146 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
13147#endif
13148
13149 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
13150 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
13151 if (enmOp == kIemNativeEmitMemOp_Store)
13152 {
13153 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
13154 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
13155#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13156 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
13157#else
13158 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
13159 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
13160#endif
13161 }
13162
13163 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
13164 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
13165#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13166 fVolGregMask);
13167#else
13168 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
13169#endif
13170
13171 if (iSegReg != UINT8_MAX)
13172 {
13173 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
13174 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
13175 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
13176 }
13177
13178 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
13179 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
13180
13181 /* Done setting up parameters, make the call. */
13182 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
13183
13184 /*
13185 * Put the result in the right register if this is a fetch.
13186 */
13187 if (enmOp != kIemNativeEmitMemOp_Store)
13188 {
13189 Assert(idxRegValueFetch == pVarValue->idxReg);
13190 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
13191 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
13192 }
13193
13194#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13195 /* Restore variables and guest shadow registers to volatile registers. */
13196 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
13197 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
13198#endif
13199
13200#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
13201 if (pReNative->Core.offPc)
13202 {
13203 /*
13204 * Time to restore the program counter to its original value.
13205 */
13206 /* Allocate a temporary PC register. */
13207 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
13208
13209 /* Restore the original value. */
13210 off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
13211 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
13212
13213 /* Free and flush the PC register. */
13214 iemNativeRegFreeTmp(pReNative, idxPcReg);
13215 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
13216 }
13217#endif
13218
13219#ifdef IEMNATIVE_WITH_TLB_LOOKUP
13220 if (!TlbState.fSkip)
13221 {
13222 /* end of TlbMiss - Jump to the done label. */
13223 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
13224 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
13225
13226 /*
13227 * TlbLookup:
13228 */
13229 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask,
13230 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
13231 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
13232
13233 /*
13234 * Emit code to do the actual storing / fetching.
13235 */
13236 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
13237# ifdef VBOX_WITH_STATISTICS
13238 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
13239 enmOp == kIemNativeEmitMemOp_Store
13240 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
13241 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
13242# endif
13243 switch (enmOp)
13244 {
13245 case kIemNativeEmitMemOp_Store:
13246 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
13247 {
13248 switch (cbMem)
13249 {
13250 case 1:
13251 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
13252 break;
13253 case 2:
13254 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
13255 break;
13256 case 4:
13257 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
13258 break;
13259 case 8:
13260 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
13261 break;
13262 default:
13263 AssertFailed();
13264 }
13265 }
13266 else
13267 {
13268 switch (cbMem)
13269 {
13270 case 1:
13271 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
13272 idxRegMemResult, TlbState.idxReg1);
13273 break;
13274 case 2:
13275 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
13276 idxRegMemResult, TlbState.idxReg1);
13277 break;
13278 case 4:
13279 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
13280 idxRegMemResult, TlbState.idxReg1);
13281 break;
13282 case 8:
13283 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
13284 idxRegMemResult, TlbState.idxReg1);
13285 break;
13286 default:
13287 AssertFailed();
13288 }
13289 }
13290 break;
13291
13292 case kIemNativeEmitMemOp_Fetch:
13293 case kIemNativeEmitMemOp_Fetch_Zx_U16:
13294 case kIemNativeEmitMemOp_Fetch_Zx_U32:
13295 case kIemNativeEmitMemOp_Fetch_Zx_U64:
13296 switch (cbMem)
13297 {
13298 case 1:
13299 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13300 break;
13301 case 2:
13302 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13303 break;
13304 case 4:
13305 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13306 break;
13307 case 8:
13308 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13309 break;
13310 default:
13311 AssertFailed();
13312 }
13313 break;
13314
13315 case kIemNativeEmitMemOp_Fetch_Sx_U16:
13316 Assert(cbMem == 1);
13317 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13318 break;
13319
13320 case kIemNativeEmitMemOp_Fetch_Sx_U32:
13321 Assert(cbMem == 1 || cbMem == 2);
13322 if (cbMem == 1)
13323 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13324 else
13325 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13326 break;
13327
13328 case kIemNativeEmitMemOp_Fetch_Sx_U64:
13329 switch (cbMem)
13330 {
13331 case 1:
13332 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13333 break;
13334 case 2:
13335 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13336 break;
13337 case 4:
13338 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13339 break;
13340 default:
13341 AssertFailed();
13342 }
13343 break;
13344
13345 default:
13346 AssertFailed();
13347 }
13348
13349 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
13350
13351 /*
13352 * TlbDone:
13353 */
13354 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
13355
13356 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
13357
13358# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13359 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
13360 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
13361# endif
13362 }
13363#else
13364 RT_NOREF(fAlignMask, idxLabelTlbMiss);
13365#endif
13366
13367 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
13368 iemNativeVarRegisterRelease(pReNative, idxVarValue);
13369 return off;
13370}
13371
13372
13373
13374/*********************************************************************************************************************************
13375* Memory fetches (IEM_MEM_FETCH_XXX). *
13376*********************************************************************************************************************************/
13377
13378/* 8-bit segmented: */
13379#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
13380 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
13381 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
13382 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
13383
13384#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
13385 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
13386 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
13387 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
13388
13389#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
13390 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
13391 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
13392 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
13393
13394#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
13395 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
13396 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
13397 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
13398
13399#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
13400 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
13401 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
13402 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
13403
13404#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
13405 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
13406 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
13407 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
13408
13409#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
13410 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
13411 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
13412 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
13413
13414/* 16-bit segmented: */
13415#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
13416 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
13417 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
13418 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
13419
13420#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
13421 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
13422 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
13423 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
13424
13425#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
13426 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
13427 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
13428 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
13429
13430#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
13431 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
13432 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
13433 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
13434
13435#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
13436 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
13437 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
13438 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
13439
13440#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
13441 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
13442 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
13443 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
13444
13445
13446/* 32-bit segmented: */
13447#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
13448 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
13449 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
13450 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
13451
13452#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
13453 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
13454 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
13455 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
13456
13457#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
13458 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
13459 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
13460 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
13461
13462#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
13463 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
13464 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
13465 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
13466
13467
13468/* 64-bit segmented: */
13469#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
13470 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
13471 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
13472 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
13473
13474
13475
13476/* 8-bit flat: */
13477#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
13478 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
13479 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
13480 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
13481
13482#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
13483 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
13484 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
13485 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
13486
13487#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
13488 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
13489 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
13490 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
13491
13492#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
13493 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
13494 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
13495 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
13496
13497#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
13498 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
13499 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
13500 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
13501
13502#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
13503 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
13504 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
13505 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
13506
13507#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
13508 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
13509 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
13510 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
13511
13512
13513/* 16-bit flat: */
13514#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
13515 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
13516 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
13517 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
13518
13519#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
13520 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
13521 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
13522 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
13523
13524#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
13525 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
13526 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
13527 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
13528
13529#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
13530 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
13531 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
13532 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
13533
13534#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
13535 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
13536 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
13537 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
13538
13539#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
13540 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
13541 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
13542 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
13543
13544/* 32-bit flat: */
13545#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
13546 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
13547 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
13548 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
13549
13550#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
13551 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
13552 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
13553 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
13554
13555#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
13556 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
13557 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
13558 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
13559
13560#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
13561 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
13562 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
13563 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
13564
13565/* 64-bit flat: */
13566#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
13567 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
13568 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
13569 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
13570
13571
13572
13573/*********************************************************************************************************************************
13574* Memory stores (IEM_MEM_STORE_XXX). *
13575*********************************************************************************************************************************/
13576
13577#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
13578 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
13579 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
13580 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
13581
13582#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
13583 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
13584 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
13585 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
13586
13587#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
13588 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
13589 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
13590 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
13591
13592#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
13593 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
13594 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
13595 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
13596
13597
13598#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
13599 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
13600 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
13601 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
13602
13603#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
13604 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
13605 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
13606 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
13607
13608#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
13609 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
13610 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
13611 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
13612
13613#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
13614 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
13615 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
13616 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
13617
13618
13619#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
13620 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
13621 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
13622
13623#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
13624 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
13625 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
13626
13627#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
13628 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
13629 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
13630
13631#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
13632 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
13633 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
13634
13635
13636#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
13637 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
13638 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
13639
13640#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
13641 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
13642 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
13643
13644#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
13645 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
13646 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
13647
13648#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
13649 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
13650 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
13651
13652/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
13653 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
13654DECL_INLINE_THROW(uint32_t)
13655iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
13656 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
13657{
13658 /*
13659 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
13660 * to do the grunt work.
13661 */
13662 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
13663 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
13664 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
13665 pfnFunction, idxInstr);
13666 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
13667 return off;
13668}
13669
13670
13671
13672/*********************************************************************************************************************************
13673* Stack Accesses. *
13674*********************************************************************************************************************************/
13675/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
13676#define IEM_MC_PUSH_U16(a_u16Value) \
13677 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
13678 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
13679#define IEM_MC_PUSH_U32(a_u32Value) \
13680 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
13681 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
13682#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
13683 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
13684 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
13685#define IEM_MC_PUSH_U64(a_u64Value) \
13686 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
13687 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
13688
13689#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
13690 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
13691 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
13692#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
13693 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
13694 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
13695#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
13696 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
13697 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
13698
13699#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
13700 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
13701 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
13702#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
13703 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
13704 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
13705
13706
13707DECL_FORCE_INLINE_THROW(uint32_t)
13708iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
13709{
13710 /* Use16BitSp: */
13711#ifdef RT_ARCH_AMD64
13712 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
13713 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
13714#else
13715 /* sub regeff, regrsp, #cbMem */
13716 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
13717 /* and regeff, regeff, #0xffff */
13718 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
13719 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
13720 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
13721 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
13722#endif
13723 return off;
13724}
13725
13726
13727DECL_FORCE_INLINE(uint32_t)
13728iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
13729{
13730 /* Use32BitSp: */
13731 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
13732 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
13733 return off;
13734}
13735
13736
13737/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
13738DECL_INLINE_THROW(uint32_t)
13739iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
13740 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
13741{
13742 /*
13743 * Assert sanity.
13744 */
13745 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
13746 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
13747#ifdef VBOX_STRICT
13748 if (RT_BYTE2(cBitsVarAndFlat) != 0)
13749 {
13750 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
13751 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
13752 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
13753 Assert( pfnFunction
13754 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
13755 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
13756 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
13757 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
13758 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
13759 : UINT64_C(0xc000b000a0009000) ));
13760 }
13761 else
13762 Assert( pfnFunction
13763 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
13764 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
13765 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
13766 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
13767 : UINT64_C(0xc000b000a0009000) ));
13768#endif
13769
13770#ifdef VBOX_STRICT
13771 /*
13772 * Check that the fExec flags we've got make sense.
13773 */
13774 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
13775#endif
13776
13777 /*
13778 * To keep things simple we have to commit any pending writes first as we
13779 * may end up making calls.
13780 */
13781 /** @todo we could postpone this till we make the call and reload the
13782 * registers after returning from the call. Not sure if that's sensible or
13783 * not, though. */
13784 off = iemNativeRegFlushPendingWrites(pReNative, off);
13785
13786 /*
13787 * First we calculate the new RSP and the effective stack pointer value.
13788 * For 64-bit mode and flat 32-bit these two are the same.
13789 * (Code structure is very similar to that of PUSH)
13790 */
13791 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
13792 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
13793 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
13794 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
13795 ? cbMem : sizeof(uint16_t);
13796 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
13797 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
13798 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
13799 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
13800 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
13801 if (cBitsFlat != 0)
13802 {
13803 Assert(idxRegEffSp == idxRegRsp);
13804 Assert(cBitsFlat == 32 || cBitsFlat == 64);
13805 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
13806 if (cBitsFlat == 64)
13807 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
13808 else
13809 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
13810 }
13811 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
13812 {
13813 Assert(idxRegEffSp != idxRegRsp);
13814 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
13815 kIemNativeGstRegUse_ReadOnly);
13816#ifdef RT_ARCH_AMD64
13817 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
13818#else
13819 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
13820#endif
13821 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
13822 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
13823 offFixupJumpToUseOtherBitSp = off;
13824 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
13825 {
13826 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
13827 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
13828 }
13829 else
13830 {
13831 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
13832 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
13833 }
13834 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
13835 }
13836 /* SpUpdateEnd: */
13837 uint32_t const offLabelSpUpdateEnd = off;
13838
13839 /*
13840 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
13841 * we're skipping lookup).
13842 */
13843 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
13844 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
13845 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
13846 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
13847 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
13848 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
13849 : UINT32_MAX;
13850 uint8_t const idxRegValue = !TlbState.fSkip
13851 && pVarValue->enmKind != kIemNativeVarKind_Immediate
13852 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
13853 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
13854 : UINT8_MAX;
13855 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
13856
13857
13858 if (!TlbState.fSkip)
13859 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
13860 else
13861 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
13862
13863 /*
13864 * Use16BitSp:
13865 */
13866 if (cBitsFlat == 0)
13867 {
13868#ifdef RT_ARCH_AMD64
13869 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
13870#else
13871 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
13872#endif
13873 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
13874 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
13875 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
13876 else
13877 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
13878 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
13879 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
13880 }
13881
13882 /*
13883 * TlbMiss:
13884 *
13885 * Call helper to do the pushing.
13886 */
13887 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
13888
13889#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
13890 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
13891#else
13892 RT_NOREF(idxInstr);
13893#endif
13894
13895 /* Save variables in volatile registers. */
13896 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
13897 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
13898 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
13899 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
13900 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
13901
13902 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
13903 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
13904 {
13905 /* Swap them using ARG0 as temp register: */
13906 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
13907 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
13908 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
13909 }
13910 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
13911 {
13912 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
13913 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
13914 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
13915
13916 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
13917 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
13918 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
13919 }
13920 else
13921 {
13922 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
13923 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
13924
13925 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
13926 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
13927 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~IEMNATIVE_CALL_ARG1_GREG);
13928 }
13929
13930 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
13931 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
13932
13933 /* Done setting up parameters, make the call. */
13934 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
13935
13936 /* Restore variables and guest shadow registers to volatile registers. */
13937 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
13938 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
13939
13940#ifdef IEMNATIVE_WITH_TLB_LOOKUP
13941 if (!TlbState.fSkip)
13942 {
13943 /* end of TlbMiss - Jump to the done label. */
13944 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
13945 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
13946
13947 /*
13948 * TlbLookup:
13949 */
13950 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
13951 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
13952
13953 /*
13954 * Emit code to do the actual storing / fetching.
13955 */
13956 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
13957# ifdef VBOX_WITH_STATISTICS
13958 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
13959 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
13960# endif
13961 if (idxRegValue != UINT8_MAX)
13962 {
13963 switch (cbMemAccess)
13964 {
13965 case 2:
13966 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
13967 break;
13968 case 4:
13969 if (!fIsIntelSeg)
13970 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
13971 else
13972 {
13973 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
13974 PUSH FS in real mode, so we have to try emulate that here.
13975 We borrow the now unused idxReg1 from the TLB lookup code here. */
13976 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
13977 kIemNativeGstReg_EFlags);
13978 if (idxRegEfl != UINT8_MAX)
13979 {
13980#ifdef ARCH_AMD64
13981 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
13982 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
13983 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
13984#else
13985 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
13986 off, TlbState.idxReg1, idxRegEfl,
13987 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
13988#endif
13989 iemNativeRegFreeTmp(pReNative, idxRegEfl);
13990 }
13991 else
13992 {
13993 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
13994 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
13995 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
13996 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
13997 }
13998 /* ASSUMES the upper half of idxRegValue is ZERO. */
13999 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
14000 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
14001 }
14002 break;
14003 case 8:
14004 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
14005 break;
14006 default:
14007 AssertFailed();
14008 }
14009 }
14010 else
14011 {
14012 switch (cbMemAccess)
14013 {
14014 case 2:
14015 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
14016 idxRegMemResult, TlbState.idxReg1);
14017 break;
14018 case 4:
14019 Assert(!fIsSegReg);
14020 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
14021 idxRegMemResult, TlbState.idxReg1);
14022 break;
14023 case 8:
14024 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
14025 break;
14026 default:
14027 AssertFailed();
14028 }
14029 }
14030
14031 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
14032 TlbState.freeRegsAndReleaseVars(pReNative);
14033
14034 /*
14035 * TlbDone:
14036 *
14037 * Commit the new RSP value.
14038 */
14039 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
14040 }
14041#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
14042
14043 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
14044 iemNativeRegFreeTmp(pReNative, idxRegRsp);
14045 if (idxRegEffSp != idxRegRsp)
14046 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
14047
14048 /* The value variable is implictly flushed. */
14049 if (idxRegValue != UINT8_MAX)
14050 iemNativeVarRegisterRelease(pReNative, idxVarValue);
14051 iemNativeVarFreeLocal(pReNative, idxVarValue);
14052
14053 return off;
14054}
14055
14056
14057
14058/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
14059#define IEM_MC_POP_GREG_U16(a_iGReg) \
14060 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
14061 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
14062#define IEM_MC_POP_GREG_U32(a_iGReg) \
14063 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
14064 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
14065#define IEM_MC_POP_GREG_U64(a_iGReg) \
14066 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
14067 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
14068
14069#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
14070 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
14071 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
14072#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
14073 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
14074 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
14075
14076#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
14077 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
14078 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
14079#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
14080 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
14081 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
14082
14083
14084DECL_FORCE_INLINE_THROW(uint32_t)
14085iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
14086 uint8_t idxRegTmp)
14087{
14088 /* Use16BitSp: */
14089#ifdef RT_ARCH_AMD64
14090 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
14091 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
14092 RT_NOREF(idxRegTmp);
14093#else
14094 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
14095 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
14096 /* add tmp, regrsp, #cbMem */
14097 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
14098 /* and tmp, tmp, #0xffff */
14099 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
14100 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
14101 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
14102 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
14103#endif
14104 return off;
14105}
14106
14107
14108DECL_FORCE_INLINE(uint32_t)
14109iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
14110{
14111 /* Use32BitSp: */
14112 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
14113 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
14114 return off;
14115}
14116
14117
14118/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
14119DECL_INLINE_THROW(uint32_t)
14120iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
14121 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
14122{
14123 /*
14124 * Assert sanity.
14125 */
14126 Assert(idxGReg < 16);
14127#ifdef VBOX_STRICT
14128 if (RT_BYTE2(cBitsVarAndFlat) != 0)
14129 {
14130 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
14131 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
14132 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
14133 Assert( pfnFunction
14134 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
14135 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
14136 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
14137 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
14138 : UINT64_C(0xc000b000a0009000) ));
14139 }
14140 else
14141 Assert( pfnFunction
14142 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
14143 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
14144 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
14145 : UINT64_C(0xc000b000a0009000) ));
14146#endif
14147
14148#ifdef VBOX_STRICT
14149 /*
14150 * Check that the fExec flags we've got make sense.
14151 */
14152 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
14153#endif
14154
14155 /*
14156 * To keep things simple we have to commit any pending writes first as we
14157 * may end up making calls.
14158 */
14159 off = iemNativeRegFlushPendingWrites(pReNative, off);
14160
14161 /*
14162 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
14163 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
14164 * directly as the effective stack pointer.
14165 * (Code structure is very similar to that of PUSH)
14166 */
14167 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
14168 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
14169 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
14170 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
14171 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
14172 /** @todo can do a better job picking the register here. For cbMem >= 4 this
14173 * will be the resulting register value. */
14174 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
14175
14176 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
14177 if (cBitsFlat != 0)
14178 {
14179 Assert(idxRegEffSp == idxRegRsp);
14180 Assert(cBitsFlat == 32 || cBitsFlat == 64);
14181 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
14182 }
14183 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
14184 {
14185 Assert(idxRegEffSp != idxRegRsp);
14186 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
14187 kIemNativeGstRegUse_ReadOnly);
14188#ifdef RT_ARCH_AMD64
14189 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
14190#else
14191 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
14192#endif
14193 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
14194 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
14195 offFixupJumpToUseOtherBitSp = off;
14196 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
14197 {
14198/** @todo can skip idxRegRsp updating when popping ESP. */
14199 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
14200 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
14201 }
14202 else
14203 {
14204 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
14205 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
14206 }
14207 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
14208 }
14209 /* SpUpdateEnd: */
14210 uint32_t const offLabelSpUpdateEnd = off;
14211
14212 /*
14213 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
14214 * we're skipping lookup).
14215 */
14216 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
14217 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
14218 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
14219 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
14220 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
14221 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
14222 : UINT32_MAX;
14223
14224 if (!TlbState.fSkip)
14225 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
14226 else
14227 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
14228
14229 /*
14230 * Use16BitSp:
14231 */
14232 if (cBitsFlat == 0)
14233 {
14234#ifdef RT_ARCH_AMD64
14235 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
14236#else
14237 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
14238#endif
14239 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
14240 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
14241 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
14242 else
14243 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
14244 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
14245 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
14246 }
14247
14248 /*
14249 * TlbMiss:
14250 *
14251 * Call helper to do the pushing.
14252 */
14253 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
14254
14255#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
14256 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
14257#else
14258 RT_NOREF(idxInstr);
14259#endif
14260
14261 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
14262 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
14263 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
14264 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
14265
14266
14267 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
14268 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
14269 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
14270
14271 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
14272 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
14273
14274 /* Done setting up parameters, make the call. */
14275 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
14276
14277 /* Move the return register content to idxRegMemResult. */
14278 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
14279 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
14280
14281 /* Restore variables and guest shadow registers to volatile registers. */
14282 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
14283 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
14284
14285#ifdef IEMNATIVE_WITH_TLB_LOOKUP
14286 if (!TlbState.fSkip)
14287 {
14288 /* end of TlbMiss - Jump to the done label. */
14289 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
14290 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
14291
14292 /*
14293 * TlbLookup:
14294 */
14295 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
14296 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
14297
14298 /*
14299 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
14300 */
14301 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
14302# ifdef VBOX_WITH_STATISTICS
14303 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
14304 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
14305# endif
14306 switch (cbMem)
14307 {
14308 case 2:
14309 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
14310 break;
14311 case 4:
14312 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
14313 break;
14314 case 8:
14315 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
14316 break;
14317 default:
14318 AssertFailed();
14319 }
14320
14321 TlbState.freeRegsAndReleaseVars(pReNative);
14322
14323 /*
14324 * TlbDone:
14325 *
14326 * Set the new RSP value (FLAT accesses needs to calculate it first) and
14327 * commit the popped register value.
14328 */
14329 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
14330 }
14331#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
14332
14333 if (idxGReg != X86_GREG_xSP)
14334 {
14335 /* Set the register. */
14336 if (cbMem >= sizeof(uint32_t))
14337 {
14338#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
14339 AssertMsg( pReNative->idxCurCall == 0
14340 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
14341 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
14342#endif
14343 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
14344 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
14345 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
14346 }
14347 else
14348 {
14349 Assert(cbMem == sizeof(uint16_t));
14350 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
14351 kIemNativeGstRegUse_ForUpdate);
14352 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
14353 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
14354 iemNativeRegFreeTmp(pReNative, idxRegDst);
14355 }
14356
14357 /* Complete RSP calculation for FLAT mode. */
14358 if (idxRegEffSp == idxRegRsp)
14359 {
14360 if (cBitsFlat == 64)
14361 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
14362 else
14363 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
14364 }
14365 }
14366 else
14367 {
14368 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
14369 if (cbMem == sizeof(uint64_t))
14370 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
14371 else if (cbMem == sizeof(uint32_t))
14372 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
14373 else
14374 {
14375 if (idxRegEffSp == idxRegRsp)
14376 {
14377 if (cBitsFlat == 64)
14378 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
14379 else
14380 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
14381 }
14382 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
14383 }
14384 }
14385 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
14386
14387 iemNativeRegFreeTmp(pReNative, idxRegRsp);
14388 if (idxRegEffSp != idxRegRsp)
14389 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
14390 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
14391
14392 return off;
14393}
14394
14395
14396
14397/*********************************************************************************************************************************
14398* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
14399*********************************************************************************************************************************/
14400
14401#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14402 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
14403 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
14404 (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
14405
14406#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14407 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
14408 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
14409 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
14410
14411#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14412 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
14413 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
14414 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
14415
14416#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14417 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
14418 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
14419 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
14420
14421
14422#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14423 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
14424 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14425 (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
14426
14427#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14428 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
14429 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14430 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
14431
14432#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14433 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
14434 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14435 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
14436
14437#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14438 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
14439 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14440 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
14441
14442#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14443 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
14444 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14445 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
14446
14447
14448#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14449 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
14450 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14451 (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
14452
14453#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14454 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
14455 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14456 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
14457
14458#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14459 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
14460 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14461 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
14462
14463#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14464 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
14465 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14466 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
14467
14468#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14469 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
14470 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14471 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
14472
14473
14474#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14475 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
14476 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14477 (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
14478
14479#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14480 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
14481 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14482 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
14483#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14484 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
14485 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14486 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
14487
14488#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14489 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
14490 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14491 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
14492
14493#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14494 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
14495 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14496 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
14497
14498
14499#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14500 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
14501 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14502 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
14503
14504#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14505 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
14506 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
14507 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
14508
14509
14510#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14511 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
14512 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14513 (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
14514
14515#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14516 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
14517 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14518 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
14519
14520#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14521 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
14522 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14523 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
14524
14525#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14526 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
14527 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14528 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
14529
14530
14531
14532#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
14533 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
14534 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
14535 (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
14536
14537#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
14538 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
14539 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
14540 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
14541
14542#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
14543 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
14544 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
14545 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
14546
14547#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
14548 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
14549 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
14550 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
14551
14552
14553#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
14554 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
14555 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14556 (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
14557
14558#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
14559 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
14560 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14561 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
14562
14563#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
14564 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
14565 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14566 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
14567
14568#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
14569 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
14570 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14571 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
14572
14573#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
14574 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
14575 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14576 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
14577
14578
14579#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
14580 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
14581 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14582 (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
14583
14584#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
14585 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
14586 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14587 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
14588
14589#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
14590 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
14591 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14592 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
14593
14594#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
14595 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
14596 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14597 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
14598
14599#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
14600 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
14601 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14602 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
14603
14604
14605#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
14606 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
14607 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14608 (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
14609
14610#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
14611 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
14612 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14613 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
14614
14615#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
14616 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
14617 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14618 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
14619
14620#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
14621 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
14622 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14623 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
14624
14625#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
14626 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
14627 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14628 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
14629
14630
14631#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
14632 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
14633 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14634 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
14635
14636#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
14637 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
14638 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
14639 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
14640
14641
14642#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
14643 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
14644 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14645 (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
14646
14647#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
14648 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
14649 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14650 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
14651
14652#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
14653 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
14654 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14655 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
14656
14657#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
14658 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
14659 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14660 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
14661
14662
14663DECL_INLINE_THROW(uint32_t)
14664iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
14665 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint8_t fAlignMask,
14666 uintptr_t pfnFunction, uint8_t idxInstr)
14667{
14668 /*
14669 * Assert sanity.
14670 */
14671 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
14672 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
14673 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
14674 && pVarMem->cbVar == sizeof(void *),
14675 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
14676
14677 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
14678 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
14679 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
14680 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
14681 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
14682
14683 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
14684 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
14685 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
14686 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
14687 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
14688
14689 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
14690
14691 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
14692
14693#ifdef VBOX_STRICT
14694# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
14695 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
14696 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
14697 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
14698 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
14699# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
14700 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
14701 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
14702 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
14703
14704 if (iSegReg == UINT8_MAX)
14705 {
14706 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
14707 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
14708 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
14709 switch (cbMem)
14710 {
14711 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8)); break;
14712 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16)); break;
14713 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32)); break;
14714 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64)); break;
14715 case 10:
14716 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
14717 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
14718 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
14719 break;
14720 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128)); break;
14721# if 0
14722 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256)); break;
14723 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512)); break;
14724# endif
14725 default: AssertFailed(); break;
14726 }
14727 }
14728 else
14729 {
14730 Assert(iSegReg < 6);
14731 switch (cbMem)
14732 {
14733 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8)); break;
14734 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16)); break;
14735 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32)); break;
14736 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64)); break;
14737 case 10:
14738 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
14739 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
14740 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
14741 break;
14742 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128)); break;
14743# if 0
14744 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256)); break;
14745 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512)); break;
14746# endif
14747 default: AssertFailed(); break;
14748 }
14749 }
14750# undef IEM_MAP_HLP_FN
14751# undef IEM_MAP_HLP_FN_NO_AT
14752#endif
14753
14754#ifdef VBOX_STRICT
14755 /*
14756 * Check that the fExec flags we've got make sense.
14757 */
14758 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
14759#endif
14760
14761 /*
14762 * To keep things simple we have to commit any pending writes first as we
14763 * may end up making calls.
14764 */
14765 off = iemNativeRegFlushPendingWrites(pReNative, off);
14766
14767#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
14768 /*
14769 * Move/spill/flush stuff out of call-volatile registers.
14770 * This is the easy way out. We could contain this to the tlb-miss branch
14771 * by saving and restoring active stuff here.
14772 */
14773 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
14774 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
14775#endif
14776
14777 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
14778 while the tlb-miss codepath will temporarily put it on the stack.
14779 Set the the type to stack here so we don't need to do it twice below. */
14780 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
14781 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
14782 /** @todo use a tmp register from TlbState, since they'll be free after tlb
14783 * lookup is done. */
14784
14785 /*
14786 * Define labels and allocate the result register (trying for the return
14787 * register if we can).
14788 */
14789 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
14790 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
14791 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
14792 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
14793 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
14794 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
14795 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
14796 : UINT32_MAX;
14797//off=iemNativeEmitBrk(pReNative, off, 0);
14798 /*
14799 * Jump to the TLB lookup code.
14800 */
14801 if (!TlbState.fSkip)
14802 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
14803
14804 /*
14805 * TlbMiss:
14806 *
14807 * Call helper to do the fetching.
14808 * We flush all guest register shadow copies here.
14809 */
14810 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
14811
14812#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
14813 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
14814#else
14815 RT_NOREF(idxInstr);
14816#endif
14817
14818#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
14819 /* Save variables in volatile registers. */
14820 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
14821 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
14822#endif
14823
14824 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
14825 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
14826#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
14827 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
14828#else
14829 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
14830#endif
14831
14832 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
14833 if (iSegReg != UINT8_MAX)
14834 {
14835 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
14836 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
14837 }
14838
14839 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
14840 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
14841 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
14842
14843 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
14844 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
14845
14846 /* Done setting up parameters, make the call. */
14847 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
14848
14849 /*
14850 * Put the output in the right registers.
14851 */
14852 Assert(idxRegMemResult == pVarMem->idxReg);
14853 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
14854 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
14855
14856#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
14857 /* Restore variables and guest shadow registers to volatile registers. */
14858 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
14859 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
14860#endif
14861
14862 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
14863 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
14864
14865#ifdef IEMNATIVE_WITH_TLB_LOOKUP
14866 if (!TlbState.fSkip)
14867 {
14868 /* end of tlbsmiss - Jump to the done label. */
14869 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
14870 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
14871
14872 /*
14873 * TlbLookup:
14874 */
14875 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask, fAccess,
14876 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
14877# ifdef VBOX_WITH_STATISTICS
14878 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
14879 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
14880# endif
14881
14882 /* [idxVarUnmapInfo] = 0; */
14883 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
14884
14885 /*
14886 * TlbDone:
14887 */
14888 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
14889
14890 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
14891
14892# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
14893 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
14894 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
14895# endif
14896 }
14897#else
14898 RT_NOREF(fAccess, fAlignMask, idxLabelTlbMiss);
14899#endif
14900
14901 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
14902 iemNativeVarRegisterRelease(pReNative, idxVarMem);
14903
14904 return off;
14905}
14906
14907
14908#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
14909 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
14910 (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
14911
14912#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
14913 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
14914 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
14915
14916#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
14917 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
14918 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
14919
14920#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
14921 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
14922 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
14923
14924DECL_INLINE_THROW(uint32_t)
14925iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
14926 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
14927{
14928 /*
14929 * Assert sanity.
14930 */
14931 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
14932#if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
14933 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
14934#endif
14935 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
14936 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
14937 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
14938#ifdef VBOX_STRICT
14939 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
14940 {
14941 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
14942 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
14943 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
14944 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
14945 case IEM_ACCESS_TYPE_WRITE:
14946 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
14947 case IEM_ACCESS_TYPE_READ:
14948 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
14949 default: AssertFailed();
14950 }
14951#else
14952 RT_NOREF(fAccess);
14953#endif
14954
14955 /*
14956 * To keep things simple we have to commit any pending writes first as we
14957 * may end up making calls (there shouldn't be any at this point, so this
14958 * is just for consistency).
14959 */
14960 /** @todo we could postpone this till we make the call and reload the
14961 * registers after returning from the call. Not sure if that's sensible or
14962 * not, though. */
14963 off = iemNativeRegFlushPendingWrites(pReNative, off);
14964
14965 /*
14966 * Move/spill/flush stuff out of call-volatile registers.
14967 *
14968 * We exclude any register holding the bUnmapInfo variable, as we'll be
14969 * checking it after returning from the call and will free it afterwards.
14970 */
14971 /** @todo save+restore active registers and maybe guest shadows in miss
14972 * scenario. */
14973 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */, RT_BIT_32(idxVarUnmapInfo));
14974
14975 /*
14976 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
14977 * to call the unmap helper function.
14978 *
14979 * The likelyhood of it being zero is higher than for the TLB hit when doing
14980 * the mapping, as a TLB miss for an well aligned and unproblematic memory
14981 * access should also end up with a mapping that won't need special unmapping.
14982 */
14983 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
14984 * should speed up things for the pure interpreter as well when TLBs
14985 * are enabled. */
14986#ifdef RT_ARCH_AMD64
14987 if (pVarUnmapInfo->idxReg == UINT8_MAX)
14988 {
14989 /* test byte [rbp - xxx], 0ffh */
14990 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
14991 pbCodeBuf[off++] = 0xf6;
14992 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
14993 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
14994 pbCodeBuf[off++] = 0xff;
14995 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
14996 }
14997 else
14998#endif
14999 {
15000 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
15001 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
15002 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
15003 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
15004 }
15005 uint32_t const offJmpFixup = off;
15006 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
15007
15008 /*
15009 * Call the unmap helper function.
15010 */
15011#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
15012 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
15013#else
15014 RT_NOREF(idxInstr);
15015#endif
15016
15017 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
15018 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
15019 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
15020
15021 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
15022 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
15023
15024 /* Done setting up parameters, make the call. */
15025 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
15026
15027 /* The bUnmapInfo variable is implictly free by these MCs. */
15028 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
15029
15030 /*
15031 * Done, just fixup the jump for the non-call case.
15032 */
15033 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
15034
15035 return off;
15036}
15037
15038
15039
15040/*********************************************************************************************************************************
15041* State and Exceptions *
15042*********************************************************************************************************************************/
15043
15044#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
15045#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
15046
15047#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
15048#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
15049#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
15050
15051#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
15052#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
15053#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
15054
15055
15056DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
15057{
15058 /** @todo this needs a lot more work later. */
15059 RT_NOREF(pReNative, fForChange);
15060 return off;
15061}
15062
15063
15064
15065/*********************************************************************************************************************************
15066* Emitters for FPU related operations. *
15067*********************************************************************************************************************************/
15068
15069#define IEM_MC_FETCH_FCW(a_u16Fcw) \
15070 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
15071
15072/** Emits code for IEM_MC_FETCH_FCW. */
15073DECL_INLINE_THROW(uint32_t)
15074iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
15075{
15076 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
15077 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
15078
15079 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
15080
15081 /* Allocate a temporary FCW register. */
15082 /** @todo eliminate extra register */
15083 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
15084 kIemNativeGstRegUse_ReadOnly);
15085
15086 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
15087
15088 /* Free but don't flush the FCW register. */
15089 iemNativeRegFreeTmp(pReNative, idxFcwReg);
15090 iemNativeVarRegisterRelease(pReNative, idxDstVar);
15091
15092 return off;
15093}
15094
15095
15096#define IEM_MC_FETCH_FSW(a_u16Fsw) \
15097 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
15098
15099/** Emits code for IEM_MC_FETCH_FSW. */
15100DECL_INLINE_THROW(uint32_t)
15101iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
15102{
15103 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
15104 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
15105
15106 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, false /*fInitialized*/);
15107 /* Allocate a temporary FSW register. */
15108 /** @todo eliminate extra register */
15109 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
15110 kIemNativeGstRegUse_ReadOnly);
15111
15112 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
15113
15114 /* Free but don't flush the FSW register. */
15115 iemNativeRegFreeTmp(pReNative, idxFswReg);
15116 iemNativeVarRegisterRelease(pReNative, idxDstVar);
15117
15118 return off;
15119}
15120
15121
15122
15123#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
15124
15125
15126/*********************************************************************************************************************************
15127* Emitters for SSE/AVX specific operations. *
15128*********************************************************************************************************************************/
15129
15130#define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc) \
15131 off = iemNativeEmitSimdCopyXregU128(pReNative, off, a_iXRegDst, a_iXRegSrc)
15132
15133/** Emits code for IEM_MC_COPY_XREG_U128. */
15134DECL_INLINE_THROW(uint32_t)
15135iemNativeEmitSimdCopyXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXRegDst, uint8_t iXRegSrc)
15136{
15137 /* Allocate destination and source register. */
15138 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegDst),
15139 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForFullWrite);
15140 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc),
15141 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
15142
15143 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
15144 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXRegDst);
15145 /* We don't need to write everything back here as the destination is marked as dirty and will be flushed automatically. */
15146
15147 /* Free but don't flush the source and destination register. */
15148 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
15149 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
15150
15151 return off;
15152}
15153
15154
15155#define IEM_MC_FETCH_XREG_U64(a_u64Value, a_iXReg, a_iQWord) \
15156 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_u64Value, a_iXReg, a_iQWord)
15157
15158/** Emits code for IEM_MC_FETCH_XREG_U64. */
15159DECL_INLINE_THROW(uint32_t)
15160iemNativeEmitSimdFetchXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iQWord)
15161{
15162 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
15163 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
15164
15165 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
15166 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
15167
15168 iemNativeVarSetKindToStack(pReNative, idxDstVar);
15169 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
15170
15171 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
15172
15173 /* Free but don't flush the source register. */
15174 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
15175 iemNativeVarRegisterRelease(pReNative, idxDstVar);
15176
15177 return off;
15178}
15179
15180
15181#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
15182
15183
15184/*********************************************************************************************************************************
15185* The native code generator functions for each MC block. *
15186*********************************************************************************************************************************/
15187
15188/*
15189 * Include instruction emitters.
15190 */
15191#include "target-x86/IEMAllN8veEmit-x86.h"
15192
15193/*
15194 * Include g_apfnIemNativeRecompileFunctions and associated functions.
15195 *
15196 * This should probably live in it's own file later, but lets see what the
15197 * compile times turn out to be first.
15198 */
15199#include "IEMNativeFunctions.cpp.h"
15200
15201
15202
15203/*********************************************************************************************************************************
15204* Recompiler Core. *
15205*********************************************************************************************************************************/
15206
15207
15208/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
15209static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
15210{
15211 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
15212 pDis->cbCachedInstr += cbMaxRead;
15213 RT_NOREF(cbMinRead);
15214 return VERR_NO_DATA;
15215}
15216
15217
15218DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
15219{
15220 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
15221 {
15222#define ENTRY(a_Member) { RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member }
15223 ENTRY(fLocalForcedActions),
15224 ENTRY(iem.s.rcPassUp),
15225 ENTRY(iem.s.fExec),
15226 ENTRY(iem.s.pbInstrBuf),
15227 ENTRY(iem.s.uInstrBufPc),
15228 ENTRY(iem.s.GCPhysInstrBuf),
15229 ENTRY(iem.s.cbInstrBufTotal),
15230 ENTRY(iem.s.idxTbCurInstr),
15231#ifdef VBOX_WITH_STATISTICS
15232 ENTRY(iem.s.StatNativeTlbHitsForFetch),
15233 ENTRY(iem.s.StatNativeTlbHitsForStore),
15234 ENTRY(iem.s.StatNativeTlbHitsForStack),
15235 ENTRY(iem.s.StatNativeTlbHitsForMapped),
15236 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
15237 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
15238 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
15239 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
15240#endif
15241 ENTRY(iem.s.DataTlb.aEntries),
15242 ENTRY(iem.s.DataTlb.uTlbRevision),
15243 ENTRY(iem.s.DataTlb.uTlbPhysRev),
15244 ENTRY(iem.s.DataTlb.cTlbHits),
15245 ENTRY(iem.s.CodeTlb.aEntries),
15246 ENTRY(iem.s.CodeTlb.uTlbRevision),
15247 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
15248 ENTRY(iem.s.CodeTlb.cTlbHits),
15249 ENTRY(pVMR3),
15250 ENTRY(cpum.GstCtx.rax),
15251 ENTRY(cpum.GstCtx.ah),
15252 ENTRY(cpum.GstCtx.rcx),
15253 ENTRY(cpum.GstCtx.ch),
15254 ENTRY(cpum.GstCtx.rdx),
15255 ENTRY(cpum.GstCtx.dh),
15256 ENTRY(cpum.GstCtx.rbx),
15257 ENTRY(cpum.GstCtx.bh),
15258 ENTRY(cpum.GstCtx.rsp),
15259 ENTRY(cpum.GstCtx.rbp),
15260 ENTRY(cpum.GstCtx.rsi),
15261 ENTRY(cpum.GstCtx.rdi),
15262 ENTRY(cpum.GstCtx.r8),
15263 ENTRY(cpum.GstCtx.r9),
15264 ENTRY(cpum.GstCtx.r10),
15265 ENTRY(cpum.GstCtx.r11),
15266 ENTRY(cpum.GstCtx.r12),
15267 ENTRY(cpum.GstCtx.r13),
15268 ENTRY(cpum.GstCtx.r14),
15269 ENTRY(cpum.GstCtx.r15),
15270 ENTRY(cpum.GstCtx.es.Sel),
15271 ENTRY(cpum.GstCtx.es.u64Base),
15272 ENTRY(cpum.GstCtx.es.u32Limit),
15273 ENTRY(cpum.GstCtx.es.Attr),
15274 ENTRY(cpum.GstCtx.cs.Sel),
15275 ENTRY(cpum.GstCtx.cs.u64Base),
15276 ENTRY(cpum.GstCtx.cs.u32Limit),
15277 ENTRY(cpum.GstCtx.cs.Attr),
15278 ENTRY(cpum.GstCtx.ss.Sel),
15279 ENTRY(cpum.GstCtx.ss.u64Base),
15280 ENTRY(cpum.GstCtx.ss.u32Limit),
15281 ENTRY(cpum.GstCtx.ss.Attr),
15282 ENTRY(cpum.GstCtx.ds.Sel),
15283 ENTRY(cpum.GstCtx.ds.u64Base),
15284 ENTRY(cpum.GstCtx.ds.u32Limit),
15285 ENTRY(cpum.GstCtx.ds.Attr),
15286 ENTRY(cpum.GstCtx.fs.Sel),
15287 ENTRY(cpum.GstCtx.fs.u64Base),
15288 ENTRY(cpum.GstCtx.fs.u32Limit),
15289 ENTRY(cpum.GstCtx.fs.Attr),
15290 ENTRY(cpum.GstCtx.gs.Sel),
15291 ENTRY(cpum.GstCtx.gs.u64Base),
15292 ENTRY(cpum.GstCtx.gs.u32Limit),
15293 ENTRY(cpum.GstCtx.gs.Attr),
15294 ENTRY(cpum.GstCtx.rip),
15295 ENTRY(cpum.GstCtx.eflags),
15296 ENTRY(cpum.GstCtx.uRipInhibitInt),
15297#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
15298 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
15299 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
15300 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
15301 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
15302 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
15303 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
15304 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
15305 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
15306 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
15307 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
15308 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
15309 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
15310 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
15311 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
15312 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
15313 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
15314 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
15315 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
15316 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
15317 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
15318 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
15319 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
15320 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
15321 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
15322 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
15323 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
15324 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
15325 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
15326 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
15327 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
15328 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
15329 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
15330#endif
15331#undef ENTRY
15332 };
15333#ifdef VBOX_STRICT
15334 static bool s_fOrderChecked = false;
15335 if (!s_fOrderChecked)
15336 {
15337 s_fOrderChecked = true;
15338 uint32_t offPrev = s_aMembers[0].off;
15339 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
15340 {
15341 Assert(s_aMembers[i].off > offPrev);
15342 offPrev = s_aMembers[i].off;
15343 }
15344 }
15345#endif
15346
15347 /*
15348 * Binary lookup.
15349 */
15350 unsigned iStart = 0;
15351 unsigned iEnd = RT_ELEMENTS(s_aMembers);
15352 for (;;)
15353 {
15354 unsigned const iCur = iStart + (iEnd - iStart) / 2;
15355 uint32_t const offCur = s_aMembers[iCur].off;
15356 if (off < offCur)
15357 {
15358 if (iCur != iStart)
15359 iEnd = iCur;
15360 else
15361 break;
15362 }
15363 else if (off > offCur)
15364 {
15365 if (iCur + 1 < iEnd)
15366 iStart = iCur + 1;
15367 else
15368 break;
15369 }
15370 else
15371 return s_aMembers[iCur].pszName;
15372 }
15373#ifdef VBOX_WITH_STATISTICS
15374 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
15375 return "iem.s.acThreadedFuncStats[iFn]";
15376#endif
15377 return NULL;
15378}
15379
15380
15381/**
15382 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
15383 * @returns pszBuf.
15384 * @param fFlags The flags.
15385 * @param pszBuf The output buffer.
15386 * @param cbBuf The output buffer size. At least 32 bytes.
15387 */
15388DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
15389{
15390 Assert(cbBuf >= 32);
15391 static RTSTRTUPLE const s_aModes[] =
15392 {
15393 /* [00] = */ { RT_STR_TUPLE("16BIT") },
15394 /* [01] = */ { RT_STR_TUPLE("32BIT") },
15395 /* [02] = */ { RT_STR_TUPLE("!2!") },
15396 /* [03] = */ { RT_STR_TUPLE("!3!") },
15397 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
15398 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
15399 /* [06] = */ { RT_STR_TUPLE("!6!") },
15400 /* [07] = */ { RT_STR_TUPLE("!7!") },
15401 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
15402 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
15403 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
15404 /* [0b] = */ { RT_STR_TUPLE("!b!") },
15405 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
15406 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
15407 /* [0e] = */ { RT_STR_TUPLE("!e!") },
15408 /* [0f] = */ { RT_STR_TUPLE("!f!") },
15409 /* [10] = */ { RT_STR_TUPLE("!10!") },
15410 /* [11] = */ { RT_STR_TUPLE("!11!") },
15411 /* [12] = */ { RT_STR_TUPLE("!12!") },
15412 /* [13] = */ { RT_STR_TUPLE("!13!") },
15413 /* [14] = */ { RT_STR_TUPLE("!14!") },
15414 /* [15] = */ { RT_STR_TUPLE("!15!") },
15415 /* [16] = */ { RT_STR_TUPLE("!16!") },
15416 /* [17] = */ { RT_STR_TUPLE("!17!") },
15417 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
15418 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
15419 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
15420 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
15421 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
15422 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
15423 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
15424 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
15425 };
15426 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
15427 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
15428 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
15429
15430 pszBuf[off++] = ' ';
15431 pszBuf[off++] = 'C';
15432 pszBuf[off++] = 'P';
15433 pszBuf[off++] = 'L';
15434 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
15435 Assert(off < 32);
15436
15437 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
15438
15439 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
15440 {
15441 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
15442 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
15443 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
15444 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
15445 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
15446 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
15447 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
15448 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
15449 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
15450 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
15451 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
15452 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
15453 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
15454 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
15455 };
15456 if (fFlags)
15457 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
15458 if (s_aFlags[i].fFlag & fFlags)
15459 {
15460 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
15461 pszBuf[off++] = ' ';
15462 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
15463 off += s_aFlags[i].cchName;
15464 fFlags &= ~s_aFlags[i].fFlag;
15465 if (!fFlags)
15466 break;
15467 }
15468 pszBuf[off] = '\0';
15469
15470 return pszBuf;
15471}
15472
15473
15474DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
15475{
15476 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
15477#if defined(RT_ARCH_AMD64)
15478 static const char * const a_apszMarkers[] =
15479 {
15480 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
15481 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
15482 };
15483#endif
15484
15485 char szDisBuf[512];
15486 DISSTATE Dis;
15487 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
15488 uint32_t const cNative = pTb->Native.cInstructions;
15489 uint32_t offNative = 0;
15490#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
15491 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
15492#endif
15493 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
15494 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
15495 : DISCPUMODE_64BIT;
15496#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
15497 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
15498#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
15499 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
15500#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
15501# error "Port me"
15502#else
15503 csh hDisasm = ~(size_t)0;
15504# if defined(RT_ARCH_AMD64)
15505 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
15506# elif defined(RT_ARCH_ARM64)
15507 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
15508# else
15509# error "Port me"
15510# endif
15511 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
15512
15513 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
15514 //Assert(rcCs == CS_ERR_OK);
15515#endif
15516
15517 /*
15518 * Print TB info.
15519 */
15520 pHlp->pfnPrintf(pHlp,
15521 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
15522 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
15523 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
15524 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
15525#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
15526 if (pDbgInfo && pDbgInfo->cEntries > 1)
15527 {
15528 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
15529
15530 /*
15531 * This disassembly is driven by the debug info which follows the native
15532 * code and indicates when it starts with the next guest instructions,
15533 * where labels are and such things.
15534 */
15535 uint32_t idxThreadedCall = 0;
15536 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
15537 uint8_t idxRange = UINT8_MAX;
15538 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
15539 uint32_t offRange = 0;
15540 uint32_t offOpcodes = 0;
15541 uint32_t const cbOpcodes = pTb->cbOpcodes;
15542 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
15543 uint32_t const cDbgEntries = pDbgInfo->cEntries;
15544 uint32_t iDbgEntry = 1;
15545 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
15546
15547 while (offNative < cNative)
15548 {
15549 /* If we're at or have passed the point where the next chunk of debug
15550 info starts, process it. */
15551 if (offDbgNativeNext <= offNative)
15552 {
15553 offDbgNativeNext = UINT32_MAX;
15554 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
15555 {
15556 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
15557 {
15558 case kIemTbDbgEntryType_GuestInstruction:
15559 {
15560 /* Did the exec flag change? */
15561 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
15562 {
15563 pHlp->pfnPrintf(pHlp,
15564 " fExec change %#08x -> %#08x %s\n",
15565 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
15566 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
15567 szDisBuf, sizeof(szDisBuf)));
15568 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
15569 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
15570 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
15571 : DISCPUMODE_64BIT;
15572 }
15573
15574 /* New opcode range? We need to fend up a spurious debug info entry here for cases
15575 where the compilation was aborted before the opcode was recorded and the actual
15576 instruction was translated to a threaded call. This may happen when we run out
15577 of ranges, or when some complicated interrupts/FFs are found to be pending or
15578 similar. So, we just deal with it here rather than in the compiler code as it
15579 is a lot simpler to do here. */
15580 if ( idxRange == UINT8_MAX
15581 || idxRange >= cRanges
15582 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
15583 {
15584 idxRange += 1;
15585 if (idxRange < cRanges)
15586 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
15587 else
15588 continue;
15589 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
15590 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
15591 + (pTb->aRanges[idxRange].idxPhysPage == 0
15592 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
15593 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
15594 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
15595 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
15596 pTb->aRanges[idxRange].idxPhysPage);
15597 GCPhysPc += offRange;
15598 }
15599
15600 /* Disassemble the instruction. */
15601 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
15602 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
15603 uint32_t cbInstr = 1;
15604 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
15605 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
15606 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
15607 if (RT_SUCCESS(rc))
15608 {
15609 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
15610 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
15611 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
15612 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
15613
15614 static unsigned const s_offMarker = 55;
15615 static char const s_szMarker[] = " ; <--- guest";
15616 if (cch < s_offMarker)
15617 {
15618 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
15619 cch = s_offMarker;
15620 }
15621 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
15622 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
15623
15624 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
15625 }
15626 else
15627 {
15628 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
15629 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
15630 cbInstr = 1;
15631 }
15632 GCPhysPc += cbInstr;
15633 offOpcodes += cbInstr;
15634 offRange += cbInstr;
15635 continue;
15636 }
15637
15638 case kIemTbDbgEntryType_ThreadedCall:
15639 pHlp->pfnPrintf(pHlp,
15640 " Call #%u to %s (%u args) - %s\n",
15641 idxThreadedCall,
15642 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
15643 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
15644 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
15645 idxThreadedCall++;
15646 continue;
15647
15648 case kIemTbDbgEntryType_GuestRegShadowing:
15649 {
15650 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
15651 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
15652 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
15653 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
15654 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
15655 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
15656 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
15657 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
15658 else
15659 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
15660 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
15661 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
15662 continue;
15663 }
15664
15665#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
15666 case kIemTbDbgEntryType_GuestSimdRegShadowing:
15667 {
15668 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
15669 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
15670 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
15671 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
15672 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
15673 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
15674 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
15675 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
15676 else
15677 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
15678 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
15679 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
15680 continue;
15681 }
15682#endif
15683
15684 case kIemTbDbgEntryType_Label:
15685 {
15686 const char *pszName = "what_the_fudge";
15687 const char *pszComment = "";
15688 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
15689 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
15690 {
15691 case kIemNativeLabelType_Return:
15692 pszName = "Return";
15693 break;
15694 case kIemNativeLabelType_ReturnBreak:
15695 pszName = "ReturnBreak";
15696 break;
15697 case kIemNativeLabelType_ReturnWithFlags:
15698 pszName = "ReturnWithFlags";
15699 break;
15700 case kIemNativeLabelType_NonZeroRetOrPassUp:
15701 pszName = "NonZeroRetOrPassUp";
15702 break;
15703 case kIemNativeLabelType_RaiseGp0:
15704 pszName = "RaiseGp0";
15705 break;
15706 case kIemNativeLabelType_RaiseNm:
15707 pszName = "RaiseNm";
15708 break;
15709 case kIemNativeLabelType_RaiseUd:
15710 pszName = "RaiseUd";
15711 break;
15712 case kIemNativeLabelType_RaiseMf:
15713 pszName = "RaiseMf";
15714 break;
15715 case kIemNativeLabelType_RaiseXf:
15716 pszName = "RaiseXf";
15717 break;
15718 case kIemNativeLabelType_ObsoleteTb:
15719 pszName = "ObsoleteTb";
15720 break;
15721 case kIemNativeLabelType_NeedCsLimChecking:
15722 pszName = "NeedCsLimChecking";
15723 break;
15724 case kIemNativeLabelType_CheckBranchMiss:
15725 pszName = "CheckBranchMiss";
15726 break;
15727 case kIemNativeLabelType_If:
15728 pszName = "If";
15729 fNumbered = true;
15730 break;
15731 case kIemNativeLabelType_Else:
15732 pszName = "Else";
15733 fNumbered = true;
15734 pszComment = " ; regs state restored pre-if-block";
15735 break;
15736 case kIemNativeLabelType_Endif:
15737 pszName = "Endif";
15738 fNumbered = true;
15739 break;
15740 case kIemNativeLabelType_CheckIrq:
15741 pszName = "CheckIrq_CheckVM";
15742 fNumbered = true;
15743 break;
15744 case kIemNativeLabelType_TlbLookup:
15745 pszName = "TlbLookup";
15746 fNumbered = true;
15747 break;
15748 case kIemNativeLabelType_TlbMiss:
15749 pszName = "TlbMiss";
15750 fNumbered = true;
15751 break;
15752 case kIemNativeLabelType_TlbDone:
15753 pszName = "TlbDone";
15754 fNumbered = true;
15755 break;
15756 case kIemNativeLabelType_Invalid:
15757 case kIemNativeLabelType_End:
15758 break;
15759 }
15760 if (fNumbered)
15761 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
15762 else
15763 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
15764 continue;
15765 }
15766
15767 case kIemTbDbgEntryType_NativeOffset:
15768 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
15769 Assert(offDbgNativeNext > offNative);
15770 break;
15771
15772#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
15773 case kIemTbDbgEntryType_DelayedPcUpdate:
15774 pHlp->pfnPrintf(pHlp,
15775 " Updating guest PC value by %u (cInstrSkipped=%u)\n",
15776 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
15777 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
15778 continue;
15779#endif
15780
15781 default:
15782 AssertFailed();
15783 }
15784 iDbgEntry++;
15785 break;
15786 }
15787 }
15788
15789 /*
15790 * Disassemble the next native instruction.
15791 */
15792 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
15793# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
15794 uint32_t cbInstr = sizeof(paNative[0]);
15795 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
15796 if (RT_SUCCESS(rc))
15797 {
15798# if defined(RT_ARCH_AMD64)
15799 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
15800 {
15801 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
15802 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
15803 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
15804 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
15805 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
15806 uInfo & 0x8000 ? "recompiled" : "todo");
15807 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
15808 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
15809 else
15810 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
15811 }
15812 else
15813# endif
15814 {
15815 const char *pszAnnotation = NULL;
15816# ifdef RT_ARCH_AMD64
15817 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
15818 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
15819 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
15820 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
15821 PCDISOPPARAM pMemOp;
15822 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))
15823 pMemOp = &Dis.Param1;
15824 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))
15825 pMemOp = &Dis.Param2;
15826 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))
15827 pMemOp = &Dis.Param3;
15828 else
15829 pMemOp = NULL;
15830 if ( pMemOp
15831 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
15832 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
15833 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
15834 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
15835
15836#elif defined(RT_ARCH_ARM64)
15837 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
15838 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
15839 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
15840# else
15841# error "Port me"
15842# endif
15843 if (pszAnnotation)
15844 {
15845 static unsigned const s_offAnnotation = 55;
15846 size_t const cchAnnotation = strlen(pszAnnotation);
15847 size_t cchDis = strlen(szDisBuf);
15848 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
15849 {
15850 if (cchDis < s_offAnnotation)
15851 {
15852 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
15853 cchDis = s_offAnnotation;
15854 }
15855 szDisBuf[cchDis++] = ' ';
15856 szDisBuf[cchDis++] = ';';
15857 szDisBuf[cchDis++] = ' ';
15858 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
15859 }
15860 }
15861 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
15862 }
15863 }
15864 else
15865 {
15866# if defined(RT_ARCH_AMD64)
15867 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
15868 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
15869# elif defined(RT_ARCH_ARM64)
15870 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
15871# else
15872# error "Port me"
15873# endif
15874 cbInstr = sizeof(paNative[0]);
15875 }
15876 offNative += cbInstr / sizeof(paNative[0]);
15877
15878# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
15879 cs_insn *pInstr;
15880 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
15881 (uintptr_t)pNativeCur, 1, &pInstr);
15882 if (cInstrs > 0)
15883 {
15884 Assert(cInstrs == 1);
15885 const char *pszAnnotation = NULL;
15886# if defined(RT_ARCH_ARM64)
15887 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
15888 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
15889 {
15890 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
15891 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
15892 char *psz = strchr(pInstr->op_str, '[');
15893 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
15894 {
15895 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
15896 int32_t off = -1;
15897 psz += 4;
15898 if (*psz == ']')
15899 off = 0;
15900 else if (*psz == ',')
15901 {
15902 psz = RTStrStripL(psz + 1);
15903 if (*psz == '#')
15904 off = RTStrToInt32(&psz[1]);
15905 /** @todo deal with index registers and LSL as well... */
15906 }
15907 if (off >= 0)
15908 pszAnnotation = iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
15909 }
15910 }
15911# endif
15912
15913 size_t const cchOp = strlen(pInstr->op_str);
15914# if defined(RT_ARCH_AMD64)
15915 if (pszAnnotation)
15916 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
15917 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
15918 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
15919 else
15920 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
15921 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
15922
15923# else
15924 if (pszAnnotation)
15925 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
15926 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
15927 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
15928 else
15929 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
15930 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
15931# endif
15932 offNative += pInstr->size / sizeof(*pNativeCur);
15933 cs_free(pInstr, cInstrs);
15934 }
15935 else
15936 {
15937# if defined(RT_ARCH_AMD64)
15938 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
15939 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
15940# else
15941 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
15942# endif
15943 offNative++;
15944 }
15945# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
15946 }
15947 }
15948 else
15949#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
15950 {
15951 /*
15952 * No debug info, just disassemble the x86 code and then the native code.
15953 *
15954 * First the guest code:
15955 */
15956 for (unsigned i = 0; i < pTb->cRanges; i++)
15957 {
15958 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
15959 + (pTb->aRanges[i].idxPhysPage == 0
15960 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
15961 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
15962 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
15963 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
15964 unsigned off = pTb->aRanges[i].offOpcodes;
15965 /** @todo this ain't working when crossing pages! */
15966 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
15967 while (off < cbOpcodes)
15968 {
15969 uint32_t cbInstr = 1;
15970 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
15971 &pTb->pabOpcodes[off], cbOpcodes - off,
15972 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
15973 if (RT_SUCCESS(rc))
15974 {
15975 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
15976 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
15977 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
15978 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
15979 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
15980 GCPhysPc += cbInstr;
15981 off += cbInstr;
15982 }
15983 else
15984 {
15985 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
15986 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
15987 break;
15988 }
15989 }
15990 }
15991
15992 /*
15993 * Then the native code:
15994 */
15995 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
15996 while (offNative < cNative)
15997 {
15998 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
15999# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
16000 uint32_t cbInstr = sizeof(paNative[0]);
16001 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
16002 if (RT_SUCCESS(rc))
16003 {
16004# if defined(RT_ARCH_AMD64)
16005 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
16006 {
16007 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
16008 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
16009 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
16010 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
16011 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
16012 uInfo & 0x8000 ? "recompiled" : "todo");
16013 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
16014 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
16015 else
16016 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
16017 }
16018 else
16019# endif
16020 {
16021# ifdef RT_ARCH_AMD64
16022 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
16023 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
16024 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
16025 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
16026# elif defined(RT_ARCH_ARM64)
16027 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
16028 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
16029 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
16030# else
16031# error "Port me"
16032# endif
16033 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
16034 }
16035 }
16036 else
16037 {
16038# if defined(RT_ARCH_AMD64)
16039 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
16040 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
16041# else
16042 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
16043# endif
16044 cbInstr = sizeof(paNative[0]);
16045 }
16046 offNative += cbInstr / sizeof(paNative[0]);
16047
16048# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
16049 cs_insn *pInstr;
16050 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
16051 (uintptr_t)pNativeCur, 1, &pInstr);
16052 if (cInstrs > 0)
16053 {
16054 Assert(cInstrs == 1);
16055# if defined(RT_ARCH_AMD64)
16056 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
16057 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
16058# else
16059 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
16060 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
16061# endif
16062 offNative += pInstr->size / sizeof(*pNativeCur);
16063 cs_free(pInstr, cInstrs);
16064 }
16065 else
16066 {
16067# if defined(RT_ARCH_AMD64)
16068 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
16069 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
16070# else
16071 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
16072# endif
16073 offNative++;
16074 }
16075# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
16076 }
16077 }
16078
16079#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
16080 /* Cleanup. */
16081 cs_close(&hDisasm);
16082#endif
16083}
16084
16085
16086/**
16087 * Recompiles the given threaded TB into a native one.
16088 *
16089 * In case of failure the translation block will be returned as-is.
16090 *
16091 * @returns pTb.
16092 * @param pVCpu The cross context virtual CPU structure of the calling
16093 * thread.
16094 * @param pTb The threaded translation to recompile to native.
16095 */
16096DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
16097{
16098 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
16099
16100 /*
16101 * The first time thru, we allocate the recompiler state, the other times
16102 * we just need to reset it before using it again.
16103 */
16104 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
16105 if (RT_LIKELY(pReNative))
16106 iemNativeReInit(pReNative, pTb);
16107 else
16108 {
16109 pReNative = iemNativeInit(pVCpu, pTb);
16110 AssertReturn(pReNative, pTb);
16111 }
16112
16113#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
16114 /*
16115 * First do liveness analysis. This is done backwards.
16116 */
16117 {
16118 uint32_t idxCall = pTb->Thrd.cCalls;
16119 if (idxCall <= pReNative->cLivenessEntriesAlloc)
16120 { /* likely */ }
16121 else
16122 {
16123 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
16124 while (idxCall > cAlloc)
16125 cAlloc *= 2;
16126 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
16127 AssertReturn(pvNew, pTb);
16128 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
16129 pReNative->cLivenessEntriesAlloc = cAlloc;
16130 }
16131 AssertReturn(idxCall > 0, pTb);
16132 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
16133
16134 /* The initial (final) entry. */
16135 idxCall--;
16136 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
16137
16138 /* Loop backwards thru the calls and fill in the other entries. */
16139 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
16140 while (idxCall > 0)
16141 {
16142 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
16143 if (pfnLiveness)
16144 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
16145 else
16146 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
16147 pCallEntry--;
16148 idxCall--;
16149 }
16150
16151# ifdef VBOX_WITH_STATISTICS
16152 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
16153 to 'clobbered' rather that 'input'. */
16154 /** @todo */
16155# endif
16156 }
16157#endif
16158
16159 /*
16160 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
16161 * for aborting if an error happens.
16162 */
16163 uint32_t cCallsLeft = pTb->Thrd.cCalls;
16164#ifdef LOG_ENABLED
16165 uint32_t const cCallsOrg = cCallsLeft;
16166#endif
16167 uint32_t off = 0;
16168 int rc = VINF_SUCCESS;
16169 IEMNATIVE_TRY_SETJMP(pReNative, rc)
16170 {
16171 /*
16172 * Emit prolog code (fixed).
16173 */
16174 off = iemNativeEmitProlog(pReNative, off);
16175
16176 /*
16177 * Convert the calls to native code.
16178 */
16179#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
16180 int32_t iGstInstr = -1;
16181#endif
16182#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
16183 uint32_t cThreadedCalls = 0;
16184 uint32_t cRecompiledCalls = 0;
16185#endif
16186#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
16187 uint32_t idxCurCall = 0;
16188#endif
16189 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
16190 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
16191 while (cCallsLeft-- > 0)
16192 {
16193 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
16194#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
16195 pReNative->idxCurCall = idxCurCall;
16196#endif
16197
16198 /*
16199 * Debug info, assembly markup and statistics.
16200 */
16201#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
16202 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
16203 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
16204#endif
16205#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
16206 iemNativeDbgInfoAddNativeOffset(pReNative, off);
16207 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
16208 {
16209 if (iGstInstr < (int32_t)pTb->cInstructions)
16210 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
16211 else
16212 Assert(iGstInstr == pTb->cInstructions);
16213 iGstInstr = pCallEntry->idxInstr;
16214 }
16215 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
16216#endif
16217#if defined(VBOX_STRICT)
16218 off = iemNativeEmitMarker(pReNative, off,
16219 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
16220#endif
16221#if defined(VBOX_STRICT)
16222 iemNativeRegAssertSanity(pReNative);
16223#endif
16224#ifdef VBOX_WITH_STATISTICS
16225 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
16226#endif
16227
16228 /*
16229 * Actual work.
16230 */
16231 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
16232 pfnRecom ? "(recompiled)" : "(todo)"));
16233 if (pfnRecom) /** @todo stats on this. */
16234 {
16235 off = pfnRecom(pReNative, off, pCallEntry);
16236 STAM_REL_STATS({cRecompiledCalls++;});
16237 }
16238 else
16239 {
16240 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
16241 STAM_REL_STATS({cThreadedCalls++;});
16242 }
16243 Assert(off <= pReNative->cInstrBufAlloc);
16244 Assert(pReNative->cCondDepth == 0);
16245
16246#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
16247 if (LogIs2Enabled())
16248 {
16249 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
16250# ifndef IEMLIVENESS_EXTENDED_LAYOUT
16251 static const char s_achState[] = "CUXI";
16252# else
16253 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
16254# endif
16255
16256 char szGpr[17];
16257 for (unsigned i = 0; i < 16; i++)
16258 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
16259 szGpr[16] = '\0';
16260
16261 char szSegBase[X86_SREG_COUNT + 1];
16262 char szSegLimit[X86_SREG_COUNT + 1];
16263 char szSegAttrib[X86_SREG_COUNT + 1];
16264 char szSegSel[X86_SREG_COUNT + 1];
16265 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
16266 {
16267 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
16268 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
16269 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
16270 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
16271 }
16272 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
16273 = szSegSel[X86_SREG_COUNT] = '\0';
16274
16275 char szEFlags[8];
16276 for (unsigned i = 0; i < 7; i++)
16277 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
16278 szEFlags[7] = '\0';
16279
16280 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
16281 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
16282 }
16283#endif
16284
16285 /*
16286 * Advance.
16287 */
16288 pCallEntry++;
16289#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
16290 idxCurCall++;
16291#endif
16292 }
16293
16294 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
16295 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
16296 if (!cThreadedCalls)
16297 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
16298
16299 /*
16300 * Emit the epilog code.
16301 */
16302 uint32_t idxReturnLabel;
16303 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
16304
16305 /*
16306 * Generate special jump labels.
16307 */
16308 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
16309 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
16310 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
16311 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
16312 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
16313 off = iemNativeEmitRaiseGp0(pReNative, off, idxReturnLabel);
16314 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseNm))
16315 off = iemNativeEmitRaiseNm(pReNative, off, idxReturnLabel);
16316 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseUd))
16317 off = iemNativeEmitRaiseUd(pReNative, off, idxReturnLabel);
16318 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseMf))
16319 off = iemNativeEmitRaiseMf(pReNative, off, idxReturnLabel);
16320 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseXf))
16321 off = iemNativeEmitRaiseXf(pReNative, off, idxReturnLabel);
16322 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ObsoleteTb))
16323 off = iemNativeEmitObsoleteTb(pReNative, off, idxReturnLabel);
16324 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_NeedCsLimChecking))
16325 off = iemNativeEmitNeedCsLimChecking(pReNative, off, idxReturnLabel);
16326 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_CheckBranchMiss))
16327 off = iemNativeEmitCheckBranchMiss(pReNative, off, idxReturnLabel);
16328 }
16329 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
16330 {
16331 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
16332 return pTb;
16333 }
16334 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
16335 Assert(off <= pReNative->cInstrBufAlloc);
16336
16337 /*
16338 * Make sure all labels has been defined.
16339 */
16340 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
16341#ifdef VBOX_STRICT
16342 uint32_t const cLabels = pReNative->cLabels;
16343 for (uint32_t i = 0; i < cLabels; i++)
16344 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
16345#endif
16346
16347 /*
16348 * Allocate executable memory, copy over the code we've generated.
16349 */
16350 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
16351 if (pTbAllocator->pDelayedFreeHead)
16352 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
16353
16354 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
16355 AssertReturn(paFinalInstrBuf, pTb);
16356 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
16357
16358 /*
16359 * Apply fixups.
16360 */
16361 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
16362 uint32_t const cFixups = pReNative->cFixups;
16363 for (uint32_t i = 0; i < cFixups; i++)
16364 {
16365 Assert(paFixups[i].off < off);
16366 Assert(paFixups[i].idxLabel < cLabels);
16367 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
16368 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
16369 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
16370 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
16371 switch (paFixups[i].enmType)
16372 {
16373#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
16374 case kIemNativeFixupType_Rel32:
16375 Assert(paFixups[i].off + 4 <= off);
16376 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
16377 continue;
16378
16379#elif defined(RT_ARCH_ARM64)
16380 case kIemNativeFixupType_RelImm26At0:
16381 {
16382 Assert(paFixups[i].off < off);
16383 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
16384 Assert(offDisp >= -262144 && offDisp < 262144);
16385 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
16386 continue;
16387 }
16388
16389 case kIemNativeFixupType_RelImm19At5:
16390 {
16391 Assert(paFixups[i].off < off);
16392 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
16393 Assert(offDisp >= -262144 && offDisp < 262144);
16394 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
16395 continue;
16396 }
16397
16398 case kIemNativeFixupType_RelImm14At5:
16399 {
16400 Assert(paFixups[i].off < off);
16401 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
16402 Assert(offDisp >= -8192 && offDisp < 8192);
16403 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
16404 continue;
16405 }
16406
16407#endif
16408 case kIemNativeFixupType_Invalid:
16409 case kIemNativeFixupType_End:
16410 break;
16411 }
16412 AssertFailed();
16413 }
16414
16415 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
16416 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
16417
16418 /*
16419 * Convert the translation block.
16420 */
16421 RTMemFree(pTb->Thrd.paCalls);
16422 pTb->Native.paInstructions = paFinalInstrBuf;
16423 pTb->Native.cInstructions = off;
16424 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
16425#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
16426 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
16427 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
16428#endif
16429
16430 Assert(pTbAllocator->cThreadedTbs > 0);
16431 pTbAllocator->cThreadedTbs -= 1;
16432 pTbAllocator->cNativeTbs += 1;
16433 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
16434
16435#ifdef LOG_ENABLED
16436 /*
16437 * Disassemble to the log if enabled.
16438 */
16439 if (LogIs3Enabled())
16440 {
16441 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
16442 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
16443# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
16444 RTLogFlush(NULL);
16445# endif
16446 }
16447#endif
16448 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
16449
16450 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
16451 return pTb;
16452}
16453
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette