VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 103776

Last change on this file since 103776 was 103775, checked in by vboxsync, 13 months ago

VMM/IEM: Implement the writeback of dirty host registers shadowing guest registers in iemNativeSimdRegAllocFindFree() so it can actually work (triggered by vzeroupper), bugref:10614

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 736.1 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 103775 2024-03-11 16:34:25Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : ...
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include "IEMInternal.h"
55#include <VBox/vmm/vmcc.h>
56#include <VBox/log.h>
57#include <VBox/err.h>
58#include <VBox/dis.h>
59#include <VBox/param.h>
60#include <iprt/assert.h>
61#include <iprt/heap.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef RT_OS_WINDOWS
71# include <iprt/formats/pecoff.h> /* this is incomaptible with windows.h, thus: */
72extern "C" DECLIMPORT(uint8_t) __cdecl RtlAddFunctionTable(void *pvFunctionTable, uint32_t cEntries, uintptr_t uBaseAddress);
73extern "C" DECLIMPORT(uint8_t) __cdecl RtlDelFunctionTable(void *pvFunctionTable);
74#else
75# include <iprt/formats/dwarf.h>
76# if defined(RT_OS_DARWIN)
77# include <libkern/OSCacheControl.h>
78# define IEMNATIVE_USE_LIBUNWIND
79extern "C" void __register_frame(const void *pvFde);
80extern "C" void __deregister_frame(const void *pvFde);
81# else
82# ifdef DEBUG_bird /** @todo not thread safe yet */
83# define IEMNATIVE_USE_GDB_JIT
84# endif
85# ifdef IEMNATIVE_USE_GDB_JIT
86# include <iprt/critsect.h>
87# include <iprt/once.h>
88# include <iprt/formats/elf64.h>
89# endif
90extern "C" void __register_frame_info(void *pvBegin, void *pvObj); /* found no header for these two */
91extern "C" void *__deregister_frame_info(void *pvBegin); /* (returns pvObj from __register_frame_info call) */
92# endif
93#endif
94#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
95# include "/opt/local/include/capstone/capstone.h"
96#endif
97
98#include "IEMInline.h"
99#include "IEMThreadedFunctions.h"
100#include "IEMN8veRecompiler.h"
101#include "IEMN8veRecompilerEmit.h"
102#include "IEMN8veRecompilerTlbLookup.h"
103#include "IEMNativeFunctions.h"
104
105
106/*
107 * Narrow down configs here to avoid wasting time on unused configs here.
108 * Note! Same checks in IEMAllThrdRecompiler.cpp.
109 */
110
111#ifndef IEM_WITH_CODE_TLB
112# error The code TLB must be enabled for the recompiler.
113#endif
114
115#ifndef IEM_WITH_DATA_TLB
116# error The data TLB must be enabled for the recompiler.
117#endif
118
119#ifndef IEM_WITH_SETJMP
120# error The setjmp approach must be enabled for the recompiler.
121#endif
122
123/** @todo eliminate this clang build hack. */
124#if RT_CLANG_PREREQ(4, 0)
125# pragma GCC diagnostic ignored "-Wunused-function"
126#endif
127
128
129/*********************************************************************************************************************************
130* Internal Functions *
131*********************************************************************************************************************************/
132#ifdef VBOX_STRICT
133static uint32_t iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off,
134 uint8_t idxReg, IEMNATIVEGSTREG enmGstReg);
135# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
136static uint32_t iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
137 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz);
138# endif
139static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative);
140#endif
141#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
142static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off);
143static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
144#endif
145DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
146DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
147 IEMNATIVEGSTREG enmGstReg, uint32_t off);
148DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
149
150
151/*********************************************************************************************************************************
152* Executable Memory Allocator *
153*********************************************************************************************************************************/
154/** @def IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
155 * Use an alternative chunk sub-allocator that does store internal data
156 * in the chunk.
157 *
158 * Using the RTHeapSimple is not practial on newer darwin systems where
159 * RTMEM_PROT_WRITE and RTMEM_PROT_EXEC are mutually exclusive in process
160 * memory. We would have to change the protection of the whole chunk for
161 * every call to RTHeapSimple, which would be rather expensive.
162 *
163 * This alternative implemenation let restrict page protection modifications
164 * to the pages backing the executable memory we just allocated.
165 */
166#define IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
167/** The chunk sub-allocation unit size in bytes. */
168#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE 128
169/** The chunk sub-allocation unit size as a shift factor. */
170#define IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT 7
171
172#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
173# ifdef IEMNATIVE_USE_GDB_JIT
174# define IEMNATIVE_USE_GDB_JIT_ET_DYN
175
176/** GDB JIT: Code entry. */
177typedef struct GDBJITCODEENTRY
178{
179 struct GDBJITCODEENTRY *pNext;
180 struct GDBJITCODEENTRY *pPrev;
181 uint8_t *pbSymFile;
182 uint64_t cbSymFile;
183} GDBJITCODEENTRY;
184
185/** GDB JIT: Actions. */
186typedef enum GDBJITACTIONS : uint32_t
187{
188 kGdbJitaction_NoAction = 0, kGdbJitaction_Register, kGdbJitaction_Unregister
189} GDBJITACTIONS;
190
191/** GDB JIT: Descriptor. */
192typedef struct GDBJITDESCRIPTOR
193{
194 uint32_t uVersion;
195 GDBJITACTIONS enmAction;
196 GDBJITCODEENTRY *pRelevant;
197 GDBJITCODEENTRY *pHead;
198 /** Our addition: */
199 GDBJITCODEENTRY *pTail;
200} GDBJITDESCRIPTOR;
201
202/** GDB JIT: Our simple symbol file data. */
203typedef struct GDBJITSYMFILE
204{
205 Elf64_Ehdr EHdr;
206# ifndef IEMNATIVE_USE_GDB_JIT_ET_DYN
207 Elf64_Shdr aShdrs[5];
208# else
209 Elf64_Shdr aShdrs[7];
210 Elf64_Phdr aPhdrs[2];
211# endif
212 /** The dwarf ehframe data for the chunk. */
213 uint8_t abEhFrame[512];
214 char szzStrTab[128];
215 Elf64_Sym aSymbols[3];
216# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
217 Elf64_Sym aDynSyms[2];
218 Elf64_Dyn aDyn[6];
219# endif
220} GDBJITSYMFILE;
221
222extern "C" GDBJITDESCRIPTOR __jit_debug_descriptor;
223extern "C" DECLEXPORT(void) __jit_debug_register_code(void);
224
225/** Init once for g_IemNativeGdbJitLock. */
226static RTONCE g_IemNativeGdbJitOnce = RTONCE_INITIALIZER;
227/** Init once for the critical section. */
228static RTCRITSECT g_IemNativeGdbJitLock;
229
230/** GDB reads the info here. */
231GDBJITDESCRIPTOR __jit_debug_descriptor = { 1, kGdbJitaction_NoAction, NULL, NULL };
232
233/** GDB sets a breakpoint on this and checks __jit_debug_descriptor when hit. */
234DECL_NO_INLINE(RT_NOTHING, DECLEXPORT(void)) __jit_debug_register_code(void)
235{
236 ASMNopPause();
237}
238
239/** @callback_method_impl{FNRTONCE} */
240static DECLCALLBACK(int32_t) iemNativeGdbJitInitOnce(void *pvUser)
241{
242 RT_NOREF(pvUser);
243 return RTCritSectInit(&g_IemNativeGdbJitLock);
244}
245
246
247# endif /* IEMNATIVE_USE_GDB_JIT */
248
249/**
250 * Per-chunk unwind info for non-windows hosts.
251 */
252typedef struct IEMEXECMEMCHUNKEHFRAME
253{
254# ifdef IEMNATIVE_USE_LIBUNWIND
255 /** The offset of the FDA into abEhFrame. */
256 uintptr_t offFda;
257# else
258 /** 'struct object' storage area. */
259 uint8_t abObject[1024];
260# endif
261# ifdef IEMNATIVE_USE_GDB_JIT
262# if 0
263 /** The GDB JIT 'symbol file' data. */
264 GDBJITSYMFILE GdbJitSymFile;
265# endif
266 /** The GDB JIT list entry. */
267 GDBJITCODEENTRY GdbJitEntry;
268# endif
269 /** The dwarf ehframe data for the chunk. */
270 uint8_t abEhFrame[512];
271} IEMEXECMEMCHUNKEHFRAME;
272/** Pointer to per-chunk info info for non-windows hosts. */
273typedef IEMEXECMEMCHUNKEHFRAME *PIEMEXECMEMCHUNKEHFRAME;
274#endif
275
276
277/**
278 * An chunk of executable memory.
279 */
280typedef struct IEMEXECMEMCHUNK
281{
282#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
283 /** Number of free items in this chunk. */
284 uint32_t cFreeUnits;
285 /** Hint were to start searching for free space in the allocation bitmap. */
286 uint32_t idxFreeHint;
287#else
288 /** The heap handle. */
289 RTHEAPSIMPLE hHeap;
290#endif
291 /** Pointer to the chunk. */
292 void *pvChunk;
293#ifdef IN_RING3
294 /**
295 * Pointer to the unwind information.
296 *
297 * This is used during C++ throw and longjmp (windows and probably most other
298 * platforms). Some debuggers (windbg) makes use of it as well.
299 *
300 * Windows: This is allocated from hHeap on windows because (at least for
301 * AMD64) the UNWIND_INFO structure address in the
302 * RUNTIME_FUNCTION entry is an RVA and the chunk is the "image".
303 *
304 * Others: Allocated from the regular heap to avoid unnecessary executable data
305 * structures. This points to an IEMEXECMEMCHUNKEHFRAME structure. */
306 void *pvUnwindInfo;
307#elif defined(IN_RING0)
308 /** Allocation handle. */
309 RTR0MEMOBJ hMemObj;
310#endif
311} IEMEXECMEMCHUNK;
312/** Pointer to a memory chunk. */
313typedef IEMEXECMEMCHUNK *PIEMEXECMEMCHUNK;
314
315
316/**
317 * Executable memory allocator for the native recompiler.
318 */
319typedef struct IEMEXECMEMALLOCATOR
320{
321 /** Magic value (IEMEXECMEMALLOCATOR_MAGIC). */
322 uint32_t uMagic;
323
324 /** The chunk size. */
325 uint32_t cbChunk;
326 /** The maximum number of chunks. */
327 uint32_t cMaxChunks;
328 /** The current number of chunks. */
329 uint32_t cChunks;
330 /** Hint where to start looking for available memory. */
331 uint32_t idxChunkHint;
332 /** Statistics: Current number of allocations. */
333 uint32_t cAllocations;
334
335 /** The total amount of memory available. */
336 uint64_t cbTotal;
337 /** Total amount of free memory. */
338 uint64_t cbFree;
339 /** Total amount of memory allocated. */
340 uint64_t cbAllocated;
341
342#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
343 /** Pointer to the allocation bitmaps for all the chunks (follows aChunks).
344 *
345 * Since the chunk size is a power of two and the minimum chunk size is a lot
346 * higher than the IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE, each chunk will always
347 * require a whole number of uint64_t elements in the allocation bitmap. So,
348 * for sake of simplicity, they are allocated as one continous chunk for
349 * simplicity/laziness. */
350 uint64_t *pbmAlloc;
351 /** Number of units (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE) per chunk. */
352 uint32_t cUnitsPerChunk;
353 /** Number of bitmap elements per chunk (for quickly locating the bitmap
354 * portion corresponding to an chunk). */
355 uint32_t cBitmapElementsPerChunk;
356#else
357 /** @name Tweaks to get 64 byte aligned allocats w/o unnecessary fragmentation.
358 * @{ */
359 /** The size of the heap internal block header. This is used to adjust the
360 * request memory size to make sure there is exacly enough room for a header at
361 * the end of the blocks we allocate before the next 64 byte alignment line. */
362 uint32_t cbHeapBlockHdr;
363 /** The size of initial heap allocation required make sure the first
364 * allocation is correctly aligned. */
365 uint32_t cbHeapAlignTweak;
366 /** The alignment tweak allocation address. */
367 void *pvAlignTweak;
368 /** @} */
369#endif
370
371#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
372 /** Pointer to the array of unwind info running parallel to aChunks (same
373 * allocation as this structure, located after the bitmaps).
374 * (For Windows, the structures must reside in 32-bit RVA distance to the
375 * actual chunk, so they are allocated off the chunk.) */
376 PIEMEXECMEMCHUNKEHFRAME paEhFrames;
377#endif
378
379 /** The allocation chunks. */
380 RT_FLEXIBLE_ARRAY_EXTENSION
381 IEMEXECMEMCHUNK aChunks[RT_FLEXIBLE_ARRAY];
382} IEMEXECMEMALLOCATOR;
383/** Pointer to an executable memory allocator. */
384typedef IEMEXECMEMALLOCATOR *PIEMEXECMEMALLOCATOR;
385
386/** Magic value for IEMEXECMEMALLOCATOR::uMagic (Scott Frederick Turow). */
387#define IEMEXECMEMALLOCATOR_MAGIC UINT32_C(0x19490412)
388
389
390static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator);
391
392
393/**
394 * Worker for iemExecMemAllocatorAlloc that returns @a pvRet after updating
395 * the heap statistics.
396 */
397static void * iemExecMemAllocatorAllocTailCode(PIEMEXECMEMALLOCATOR pExecMemAllocator, void *pvRet,
398 uint32_t cbReq, uint32_t idxChunk)
399{
400 pExecMemAllocator->cAllocations += 1;
401 pExecMemAllocator->cbAllocated += cbReq;
402#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
403 pExecMemAllocator->cbFree -= cbReq;
404#else
405 pExecMemAllocator->cbFree -= RT_ALIGN_32(cbReq, 64);
406#endif
407 pExecMemAllocator->idxChunkHint = idxChunk;
408
409#ifdef RT_OS_DARWIN
410 /*
411 * Sucks, but RTMEM_PROT_EXEC and RTMEM_PROT_WRITE are mutually exclusive
412 * on darwin. So, we mark the pages returned as read+write after alloc and
413 * expect the caller to call iemExecMemAllocatorReadyForUse when done
414 * writing to the allocation.
415 *
416 * See also https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
417 * for details.
418 */
419 /** @todo detect if this is necessary... it wasn't required on 10.15 or
420 * whatever older version it was. */
421 int rc = RTMemProtect(pvRet, cbReq, RTMEM_PROT_WRITE | RTMEM_PROT_READ);
422 AssertRC(rc);
423#endif
424
425 return pvRet;
426}
427
428
429#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
430static void *iemExecMemAllocatorAllocInChunkInt(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint64_t *pbmAlloc, uint32_t idxFirst,
431 uint32_t cToScan, uint32_t cReqUnits, uint32_t idxChunk)
432{
433 /*
434 * Shift the bitmap to the idxFirst bit so we can use ASMBitFirstClear.
435 */
436 Assert(!(cToScan & 63));
437 Assert(!(idxFirst & 63));
438 Assert(cToScan + idxFirst <= pExecMemAllocator->cUnitsPerChunk);
439 pbmAlloc += idxFirst / 64;
440
441 /*
442 * Scan the bitmap for cReqUnits of consequtive clear bits
443 */
444 /** @todo This can probably be done more efficiently for non-x86 systems. */
445 int iBit = ASMBitFirstClear(pbmAlloc, cToScan);
446 while (iBit >= 0 && (uint32_t)iBit <= cToScan - cReqUnits)
447 {
448 uint32_t idxAddBit = 1;
449 while (idxAddBit < cReqUnits && !ASMBitTest(pbmAlloc, (uint32_t)iBit + idxAddBit))
450 idxAddBit++;
451 if (idxAddBit >= cReqUnits)
452 {
453 ASMBitSetRange(pbmAlloc, (uint32_t)iBit, (uint32_t)iBit + cReqUnits);
454
455 PIEMEXECMEMCHUNK const pChunk = &pExecMemAllocator->aChunks[idxChunk];
456 pChunk->cFreeUnits -= cReqUnits;
457 pChunk->idxFreeHint = (uint32_t)iBit + cReqUnits;
458
459 void * const pvRet = (uint8_t *)pChunk->pvChunk
460 + ((idxFirst + (uint32_t)iBit) << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT);
461
462 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet,
463 cReqUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT, idxChunk);
464 }
465
466 iBit = ASMBitNextClear(pbmAlloc, cToScan, iBit + idxAddBit - 1);
467 }
468 return NULL;
469}
470#endif /* IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
471
472
473static void *iemExecMemAllocatorAllocInChunk(PIEMEXECMEMALLOCATOR pExecMemAllocator, uint32_t idxChunk, uint32_t cbReq)
474{
475#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
476 /*
477 * Figure out how much to allocate.
478 */
479 uint32_t const cReqUnits = (cbReq + IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1) >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
480 if (cReqUnits <= pExecMemAllocator->aChunks[idxChunk].cFreeUnits)
481 {
482 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
483 uint32_t const idxHint = pExecMemAllocator->aChunks[idxChunk].idxFreeHint & ~(uint32_t)63;
484 if (idxHint + cReqUnits <= pExecMemAllocator->cUnitsPerChunk)
485 {
486 void *pvRet = iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, idxHint,
487 pExecMemAllocator->cUnitsPerChunk - idxHint, cReqUnits, idxChunk);
488 if (pvRet)
489 return pvRet;
490 }
491 return iemExecMemAllocatorAllocInChunkInt(pExecMemAllocator, pbmAlloc, 0,
492 RT_MIN(pExecMemAllocator->cUnitsPerChunk, RT_ALIGN_32(idxHint + cReqUnits, 64)),
493 cReqUnits, idxChunk);
494 }
495#else
496 void *pvRet = RTHeapSimpleAlloc(pExecMemAllocator->aChunks[idxChunk].hHeap, cbReq, 32);
497 if (pvRet)
498 return iemExecMemAllocatorAllocTailCode(pExecMemAllocator, pvRet, cbReq, idxChunk);
499#endif
500 return NULL;
501
502}
503
504
505/**
506 * Allocates @a cbReq bytes of executable memory.
507 *
508 * @returns Pointer to the memory, NULL if out of memory or other problem
509 * encountered.
510 * @param pVCpu The cross context virtual CPU structure of the calling
511 * thread.
512 * @param cbReq How many bytes are required.
513 */
514static void *iemExecMemAllocatorAlloc(PVMCPU pVCpu, uint32_t cbReq)
515{
516 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
517 AssertReturn(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC, NULL);
518 AssertMsgReturn(cbReq > 32 && cbReq < _512K, ("%#x\n", cbReq), NULL);
519
520
521 for (unsigned iIteration = 0;; iIteration++)
522 {
523 /*
524 * Adjust the request size so it'll fit the allocator alignment/whatnot.
525 *
526 * For the RTHeapSimple allocator this means to follow the logic described
527 * in iemExecMemAllocatorGrow and attempt to allocate it from one of the
528 * existing chunks if we think we've got sufficient free memory around.
529 *
530 * While for the alternative one we just align it up to a whole unit size.
531 */
532#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
533 cbReq = RT_ALIGN_32(cbReq, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
534#else
535 cbReq = RT_ALIGN_32(cbReq + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
536#endif
537 if (cbReq <= pExecMemAllocator->cbFree)
538 {
539 uint32_t const cChunks = pExecMemAllocator->cChunks;
540 uint32_t const idxChunkHint = pExecMemAllocator->idxChunkHint < cChunks ? pExecMemAllocator->idxChunkHint : 0;
541 for (uint32_t idxChunk = idxChunkHint; idxChunk < cChunks; idxChunk++)
542 {
543 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
544 if (pvRet)
545 return pvRet;
546 }
547 for (uint32_t idxChunk = 0; idxChunk < idxChunkHint; idxChunk++)
548 {
549 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
550 if (pvRet)
551 return pvRet;
552 }
553 }
554
555 /*
556 * Can we grow it with another chunk?
557 */
558 if (pExecMemAllocator->cChunks < pExecMemAllocator->cMaxChunks)
559 {
560 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
561 AssertLogRelRCReturn(rc, NULL);
562
563 uint32_t const idxChunk = pExecMemAllocator->cChunks - 1;
564 void *pvRet = iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbReq);
565 if (pvRet)
566 return pvRet;
567 AssertFailed();
568 }
569
570 /*
571 * Try prune native TBs once.
572 */
573 if (iIteration == 0)
574 iemTbAllocatorFreeupNativeSpace(pVCpu, cbReq / sizeof(IEMNATIVEINSTR));
575 else
576 {
577 /** @todo stats... */
578 return NULL;
579 }
580 }
581
582}
583
584
585/** This is a hook that we may need later for changing memory protection back
586 * to readonly+exec */
587static void iemExecMemAllocatorReadyForUse(PVMCPUCC pVCpu, void *pv, size_t cb)
588{
589#ifdef RT_OS_DARWIN
590 /* See iemExecMemAllocatorAllocTailCode for the explanation. */
591 int rc = RTMemProtect(pv, cb, RTMEM_PROT_EXEC | RTMEM_PROT_READ);
592 AssertRC(rc); RT_NOREF(pVCpu);
593
594 /*
595 * Flush the instruction cache:
596 * https://developer.apple.com/documentation/apple-silicon/porting-just-in-time-compilers-to-apple-silicon
597 */
598 /* sys_dcache_flush(pv, cb); - not necessary */
599 sys_icache_invalidate(pv, cb);
600#else
601 RT_NOREF(pVCpu, pv, cb);
602#endif
603}
604
605
606/**
607 * Frees executable memory.
608 */
609void iemExecMemAllocatorFree(PVMCPU pVCpu, void *pv, size_t cb)
610{
611 PIEMEXECMEMALLOCATOR pExecMemAllocator = pVCpu->iem.s.pExecMemAllocatorR3;
612 Assert(pExecMemAllocator && pExecMemAllocator->uMagic == IEMEXECMEMALLOCATOR_MAGIC);
613 Assert(pv);
614#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
615 Assert(!((uintptr_t)pv & (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE - 1)));
616#else
617 Assert(!((uintptr_t)pv & 63));
618#endif
619
620 /* Align the size as we did when allocating the block. */
621#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
622 cb = RT_ALIGN_Z(cb, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
623#else
624 cb = RT_ALIGN_Z(cb + pExecMemAllocator->cbHeapBlockHdr, 64) - pExecMemAllocator->cbHeapBlockHdr;
625#endif
626
627 /* Free it / assert sanity. */
628#if defined(VBOX_STRICT) || defined(IEMEXECMEM_USE_ALT_SUB_ALLOCATOR)
629 uint32_t const cChunks = pExecMemAllocator->cChunks;
630 uint32_t const cbChunk = pExecMemAllocator->cbChunk;
631 bool fFound = false;
632 for (uint32_t idxChunk = 0; idxChunk < cChunks; idxChunk++)
633 {
634 uintptr_t const offChunk = (uintptr_t)pv - (uintptr_t)pExecMemAllocator->aChunks[idxChunk].pvChunk;
635 fFound = offChunk < cbChunk;
636 if (fFound)
637 {
638#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
639 uint32_t const idxFirst = (uint32_t)offChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
640 uint32_t const cReqUnits = (uint32_t)cb >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
641
642 /* Check that it's valid and free it. */
643 uint64_t * const pbmAlloc = &pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk];
644 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst));
645 for (uint32_t i = 1; i < cReqUnits; i++)
646 AssertReturnVoid(ASMBitTest(pbmAlloc, idxFirst + i));
647 ASMBitClearRange(pbmAlloc, idxFirst, idxFirst + cReqUnits);
648
649 pExecMemAllocator->aChunks[idxChunk].cFreeUnits += cReqUnits;
650 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = idxFirst;
651
652 /* Update the stats. */
653 pExecMemAllocator->cbAllocated -= cb;
654 pExecMemAllocator->cbFree += cb;
655 pExecMemAllocator->cAllocations -= 1;
656 return;
657#else
658 Assert(RTHeapSimpleSize(pExecMemAllocator->aChunks[idxChunk].hHeap, pv) == cb);
659 break;
660#endif
661 }
662 }
663# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
664 AssertFailed();
665# else
666 Assert(fFound);
667# endif
668#endif
669
670#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
671 /* Update stats while cb is freshly calculated.*/
672 pExecMemAllocator->cbAllocated -= cb;
673 pExecMemAllocator->cbFree += RT_ALIGN_Z(cb, 64);
674 pExecMemAllocator->cAllocations -= 1;
675
676 /* Free it. */
677 RTHeapSimpleFree(NIL_RTHEAPSIMPLE, pv);
678#endif
679}
680
681
682
683#ifdef IN_RING3
684# ifdef RT_OS_WINDOWS
685
686/**
687 * Initializes the unwind info structures for windows hosts.
688 */
689static int
690iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
691 void *pvChunk, uint32_t idxChunk)
692{
693 RT_NOREF(pVCpu);
694
695 /*
696 * The AMD64 unwind opcodes.
697 *
698 * This is a program that starts with RSP after a RET instruction that
699 * ends up in recompiled code, and the operations we describe here will
700 * restore all non-volatile registers and bring RSP back to where our
701 * RET address is. This means it's reverse order from what happens in
702 * the prologue.
703 *
704 * Note! Using a frame register approach here both because we have one
705 * and but mainly because the UWOP_ALLOC_LARGE argument values
706 * would be a pain to write initializers for. On the positive
707 * side, we're impervious to changes in the the stack variable
708 * area can can deal with dynamic stack allocations if necessary.
709 */
710 static const IMAGE_UNWIND_CODE s_aOpcodes[] =
711 {
712 { { 16, IMAGE_AMD64_UWOP_SET_FPREG, 0 } }, /* RSP = RBP - FrameOffset * 10 (0x60) */
713 { { 16, IMAGE_AMD64_UWOP_ALLOC_SMALL, 0 } }, /* RSP += 8; */
714 { { 14, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x15 } }, /* R15 = [RSP]; RSP += 8; */
715 { { 12, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x14 } }, /* R14 = [RSP]; RSP += 8; */
716 { { 10, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x13 } }, /* R13 = [RSP]; RSP += 8; */
717 { { 8, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_x12 } }, /* R12 = [RSP]; RSP += 8; */
718 { { 7, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xDI } }, /* RDI = [RSP]; RSP += 8; */
719 { { 6, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xSI } }, /* RSI = [RSP]; RSP += 8; */
720 { { 5, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBX } }, /* RBX = [RSP]; RSP += 8; */
721 { { 4, IMAGE_AMD64_UWOP_PUSH_NONVOL, X86_GREG_xBP } }, /* RBP = [RSP]; RSP += 8; */
722 };
723 union
724 {
725 IMAGE_UNWIND_INFO Info;
726 uint8_t abPadding[RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes) + 16];
727 } s_UnwindInfo =
728 {
729 {
730 /* .Version = */ 1,
731 /* .Flags = */ 0,
732 /* .SizeOfProlog = */ 16, /* whatever */
733 /* .CountOfCodes = */ RT_ELEMENTS(s_aOpcodes),
734 /* .FrameRegister = */ X86_GREG_xBP,
735 /* .FrameOffset = */ (-IEMNATIVE_FP_OFF_LAST_PUSH + 8) / 16 /* we're off by one slot. sigh. */,
736 }
737 };
738 AssertCompile(-IEMNATIVE_FP_OFF_LAST_PUSH < 240 && -IEMNATIVE_FP_OFF_LAST_PUSH > 0);
739 AssertCompile((-IEMNATIVE_FP_OFF_LAST_PUSH & 0xf) == 8);
740
741 /*
742 * Calc how much space we need and allocate it off the exec heap.
743 */
744 unsigned const cFunctionEntries = 1;
745 unsigned const cbUnwindInfo = sizeof(s_aOpcodes) + RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes);
746 unsigned const cbNeeded = sizeof(IMAGE_RUNTIME_FUNCTION_ENTRY) * cFunctionEntries + cbUnwindInfo;
747# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
748 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
749 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions
750 = (PIMAGE_RUNTIME_FUNCTION_ENTRY)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
751# else
752 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
753 - pExecMemAllocator->cbHeapBlockHdr;
754 PIMAGE_RUNTIME_FUNCTION_ENTRY const paFunctions = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned,
755 32 /*cbAlignment*/);
756# endif
757 AssertReturn(paFunctions, VERR_INTERNAL_ERROR_5);
758 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = paFunctions;
759
760 /*
761 * Initialize the structures.
762 */
763 PIMAGE_UNWIND_INFO const pInfo = (PIMAGE_UNWIND_INFO)&paFunctions[cFunctionEntries];
764
765 paFunctions[0].BeginAddress = 0;
766 paFunctions[0].EndAddress = pExecMemAllocator->cbChunk;
767 paFunctions[0].UnwindInfoAddress = (uint32_t)((uintptr_t)pInfo - (uintptr_t)pvChunk);
768
769 memcpy(pInfo, &s_UnwindInfo, RT_UOFFSETOF(IMAGE_UNWIND_INFO, aOpcodes));
770 memcpy(&pInfo->aOpcodes[0], s_aOpcodes, sizeof(s_aOpcodes));
771
772 /*
773 * Register it.
774 */
775 uint8_t fRet = RtlAddFunctionTable(paFunctions, cFunctionEntries, (uintptr_t)pvChunk);
776 AssertReturn(fRet, VERR_INTERNAL_ERROR_3); /* Nothing to clean up on failure, since its within the chunk itself. */
777
778 return VINF_SUCCESS;
779}
780
781
782# else /* !RT_OS_WINDOWS */
783
784/**
785 * Emits a LEB128 encoded value between -0x2000 and 0x2000 (both exclusive).
786 */
787DECLINLINE(RTPTRUNION) iemDwarfPutLeb128(RTPTRUNION Ptr, int32_t iValue)
788{
789 if (iValue >= 64)
790 {
791 Assert(iValue < 0x2000);
792 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
793 *Ptr.pb++ = (uint8_t)(iValue >> 7) & 0x3f;
794 }
795 else if (iValue >= 0)
796 *Ptr.pb++ = (uint8_t)iValue;
797 else if (iValue > -64)
798 *Ptr.pb++ = ((uint8_t)iValue & 0x3f) | 0x40;
799 else
800 {
801 Assert(iValue > -0x2000);
802 *Ptr.pb++ = ((uint8_t)iValue & 0x7f) | 0x80;
803 *Ptr.pb++ = ((uint8_t)(iValue >> 7) & 0x3f) | 0x40;
804 }
805 return Ptr;
806}
807
808
809/**
810 * Emits an ULEB128 encoded value (up to 64-bit wide).
811 */
812DECLINLINE(RTPTRUNION) iemDwarfPutUleb128(RTPTRUNION Ptr, uint64_t uValue)
813{
814 while (uValue >= 0x80)
815 {
816 *Ptr.pb++ = ((uint8_t)uValue & 0x7f) | 0x80;
817 uValue >>= 7;
818 }
819 *Ptr.pb++ = (uint8_t)uValue;
820 return Ptr;
821}
822
823
824/**
825 * Emits a CFA rule as register @a uReg + offset @a off.
826 */
827DECLINLINE(RTPTRUNION) iemDwarfPutCfaDefCfa(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
828{
829 *Ptr.pb++ = DW_CFA_def_cfa;
830 Ptr = iemDwarfPutUleb128(Ptr, uReg);
831 Ptr = iemDwarfPutUleb128(Ptr, off);
832 return Ptr;
833}
834
835
836/**
837 * Emits a register (@a uReg) save location:
838 * CFA + @a off * data_alignment_factor
839 */
840DECLINLINE(RTPTRUNION) iemDwarfPutCfaOffset(RTPTRUNION Ptr, uint32_t uReg, uint32_t off)
841{
842 if (uReg < 0x40)
843 *Ptr.pb++ = DW_CFA_offset | uReg;
844 else
845 {
846 *Ptr.pb++ = DW_CFA_offset_extended;
847 Ptr = iemDwarfPutUleb128(Ptr, uReg);
848 }
849 Ptr = iemDwarfPutUleb128(Ptr, off);
850 return Ptr;
851}
852
853
854# if 0 /* unused */
855/**
856 * Emits a register (@a uReg) save location, using signed offset:
857 * CFA + @a offSigned * data_alignment_factor
858 */
859DECLINLINE(RTPTRUNION) iemDwarfPutCfaSignedOffset(RTPTRUNION Ptr, uint32_t uReg, int32_t offSigned)
860{
861 *Ptr.pb++ = DW_CFA_offset_extended_sf;
862 Ptr = iemDwarfPutUleb128(Ptr, uReg);
863 Ptr = iemDwarfPutLeb128(Ptr, offSigned);
864 return Ptr;
865}
866# endif
867
868
869/**
870 * Initializes the unwind info section for non-windows hosts.
871 */
872static int
873iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator,
874 void *pvChunk, uint32_t idxChunk)
875{
876 PIEMEXECMEMCHUNKEHFRAME const pEhFrame = &pExecMemAllocator->paEhFrames[idxChunk];
877 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = pEhFrame; /* not necessary, but whatever */
878
879 RTPTRUNION Ptr = { pEhFrame->abEhFrame };
880
881 /*
882 * Generate the CIE first.
883 */
884# ifdef IEMNATIVE_USE_LIBUNWIND /* libunwind (llvm, darwin) only supports v1 and v3. */
885 uint8_t const iDwarfVer = 3;
886# else
887 uint8_t const iDwarfVer = 4;
888# endif
889 RTPTRUNION const PtrCie = Ptr;
890 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
891 *Ptr.pu32++ = 0 /*UINT32_MAX*/; /* I'm a CIE in .eh_frame speak. */
892 *Ptr.pb++ = iDwarfVer; /* DwARF version */
893 *Ptr.pb++ = 0; /* Augmentation. */
894 if (iDwarfVer >= 4)
895 {
896 *Ptr.pb++ = sizeof(uintptr_t); /* Address size. */
897 *Ptr.pb++ = 0; /* Segment selector size. */
898 }
899# ifdef RT_ARCH_AMD64
900 Ptr = iemDwarfPutLeb128(Ptr, 1); /* Code alignment factor (LEB128 = 1). */
901# else
902 Ptr = iemDwarfPutLeb128(Ptr, 4); /* Code alignment factor (LEB128 = 4). */
903# endif
904 Ptr = iemDwarfPutLeb128(Ptr, -8); /* Data alignment factor (LEB128 = -8). */
905# ifdef RT_ARCH_AMD64
906 Ptr = iemDwarfPutUleb128(Ptr, DWREG_AMD64_RA); /* Return address column (ULEB128) */
907# elif defined(RT_ARCH_ARM64)
908 Ptr = iemDwarfPutUleb128(Ptr, DWREG_ARM64_LR); /* Return address column (ULEB128) */
909# else
910# error "port me"
911# endif
912 /* Initial instructions: */
913# ifdef RT_ARCH_AMD64
914 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_AMD64_RBP, 16); /* CFA = RBP + 0x10 - first stack parameter */
915 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RA, 1); /* Ret RIP = [CFA + 1*-8] */
916 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBP, 2); /* RBP = [CFA + 2*-8] */
917 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_RBX, 3); /* RBX = [CFA + 3*-8] */
918 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R12, 4); /* R12 = [CFA + 4*-8] */
919 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R13, 5); /* R13 = [CFA + 5*-8] */
920 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R14, 6); /* R14 = [CFA + 6*-8] */
921 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_AMD64_R15, 7); /* R15 = [CFA + 7*-8] */
922# elif defined(RT_ARCH_ARM64)
923# if 1
924 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_BP, 16); /* CFA = BP + 0x10 - first stack parameter */
925# else
926 Ptr = iemDwarfPutCfaDefCfa(Ptr, DWREG_ARM64_SP, IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_SAVE_REG_SIZE);
927# endif
928 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_LR, 1); /* Ret PC = [CFA + 1*-8] */
929 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_BP, 2); /* Ret BP = [CFA + 2*-8] */
930 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X28, 3); /* X28 = [CFA + 3*-8] */
931 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X27, 4); /* X27 = [CFA + 4*-8] */
932 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X26, 5); /* X26 = [CFA + 5*-8] */
933 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X25, 6); /* X25 = [CFA + 6*-8] */
934 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X24, 7); /* X24 = [CFA + 7*-8] */
935 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X23, 8); /* X23 = [CFA + 8*-8] */
936 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X22, 9); /* X22 = [CFA + 9*-8] */
937 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X21, 10); /* X21 = [CFA +10*-8] */
938 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X20, 11); /* X20 = [CFA +11*-8] */
939 Ptr = iemDwarfPutCfaOffset(Ptr, DWREG_ARM64_X19, 12); /* X19 = [CFA +12*-8] */
940 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
941 /** @todo we we need to do something about clearing DWREG_ARM64_RA_SIGN_STATE or something? */
942# else
943# error "port me"
944# endif
945 while ((Ptr.u - PtrCie.u) & 3)
946 *Ptr.pb++ = DW_CFA_nop;
947 /* Finalize the CIE size. */
948 *PtrCie.pu32 = Ptr.u - PtrCie.u - sizeof(uint32_t);
949
950 /*
951 * Generate an FDE for the whole chunk area.
952 */
953# ifdef IEMNATIVE_USE_LIBUNWIND
954 pEhFrame->offFda = Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0];
955# endif
956 RTPTRUNION const PtrFde = Ptr;
957 *Ptr.pu32++ = 123; /* The CIE length will be determined later. */
958 *Ptr.pu32 = Ptr.u - PtrCie.u; /* Negated self relative CIE address. */
959 Ptr.pu32++;
960 *Ptr.pu64++ = (uintptr_t)pvChunk; /* Absolute start PC of this FDE. */
961 *Ptr.pu64++ = pExecMemAllocator->cbChunk; /* PC range length for this PDE. */
962# if 0 /* not requried for recent libunwind.dylib nor recent libgcc/glib. */
963 *Ptr.pb++ = DW_CFA_nop;
964# endif
965 while ((Ptr.u - PtrFde.u) & 3)
966 *Ptr.pb++ = DW_CFA_nop;
967 /* Finalize the FDE size. */
968 *PtrFde.pu32 = Ptr.u - PtrFde.u - sizeof(uint32_t);
969
970 /* Terminator entry. */
971 *Ptr.pu32++ = 0;
972 *Ptr.pu32++ = 0; /* just to be sure... */
973 Assert(Ptr.u - (uintptr_t)&pEhFrame->abEhFrame[0] <= sizeof(pEhFrame->abEhFrame));
974
975 /*
976 * Register it.
977 */
978# ifdef IEMNATIVE_USE_LIBUNWIND
979 __register_frame(&pEhFrame->abEhFrame[pEhFrame->offFda]);
980# else
981 memset(pEhFrame->abObject, 0xf6, sizeof(pEhFrame->abObject)); /* color the memory to better spot usage */
982 __register_frame_info(pEhFrame->abEhFrame, pEhFrame->abObject);
983# endif
984
985# ifdef IEMNATIVE_USE_GDB_JIT
986 /*
987 * Now for telling GDB about this (experimental).
988 *
989 * This seems to work best with ET_DYN.
990 */
991 unsigned const cbNeeded = sizeof(GDBJITSYMFILE);
992# ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
993 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded, IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SIZE);
994 GDBJITSYMFILE * const pSymFile = (GDBJITSYMFILE *)iemExecMemAllocatorAllocInChunk(pExecMemAllocator, idxChunk, cbNeededAligned);
995# else
996 unsigned const cbNeededAligned = RT_ALIGN_32(cbNeeded + pExecMemAllocator->cbHeapBlockHdr, 64)
997 - pExecMemAllocator->cbHeapBlockHdr;
998 GDBJITSYMFILE * const pSymFile = (PIMAGE_RUNTIME_FUNCTION_ENTRY)RTHeapSimpleAlloc(hHeap, cbNeededAligned, 32 /*cbAlignment*/);
999# endif
1000 AssertReturn(pSymFile, VERR_INTERNAL_ERROR_5);
1001 unsigned const offSymFileInChunk = (uintptr_t)pSymFile - (uintptr_t)pvChunk;
1002
1003 RT_ZERO(*pSymFile);
1004
1005 /*
1006 * The ELF header:
1007 */
1008 pSymFile->EHdr.e_ident[0] = ELFMAG0;
1009 pSymFile->EHdr.e_ident[1] = ELFMAG1;
1010 pSymFile->EHdr.e_ident[2] = ELFMAG2;
1011 pSymFile->EHdr.e_ident[3] = ELFMAG3;
1012 pSymFile->EHdr.e_ident[EI_VERSION] = EV_CURRENT;
1013 pSymFile->EHdr.e_ident[EI_CLASS] = ELFCLASS64;
1014 pSymFile->EHdr.e_ident[EI_DATA] = ELFDATA2LSB;
1015 pSymFile->EHdr.e_ident[EI_OSABI] = ELFOSABI_NONE;
1016# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1017 pSymFile->EHdr.e_type = ET_DYN;
1018# else
1019 pSymFile->EHdr.e_type = ET_REL;
1020# endif
1021# ifdef RT_ARCH_AMD64
1022 pSymFile->EHdr.e_machine = EM_AMD64;
1023# elif defined(RT_ARCH_ARM64)
1024 pSymFile->EHdr.e_machine = EM_AARCH64;
1025# else
1026# error "port me"
1027# endif
1028 pSymFile->EHdr.e_version = 1; /*?*/
1029 pSymFile->EHdr.e_entry = 0;
1030# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1031 pSymFile->EHdr.e_phoff = RT_UOFFSETOF(GDBJITSYMFILE, aPhdrs);
1032# else
1033 pSymFile->EHdr.e_phoff = 0;
1034# endif
1035 pSymFile->EHdr.e_shoff = sizeof(pSymFile->EHdr);
1036 pSymFile->EHdr.e_flags = 0;
1037 pSymFile->EHdr.e_ehsize = sizeof(pSymFile->EHdr);
1038# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1039 pSymFile->EHdr.e_phentsize = sizeof(pSymFile->aPhdrs[0]);
1040 pSymFile->EHdr.e_phnum = RT_ELEMENTS(pSymFile->aPhdrs);
1041# else
1042 pSymFile->EHdr.e_phentsize = 0;
1043 pSymFile->EHdr.e_phnum = 0;
1044# endif
1045 pSymFile->EHdr.e_shentsize = sizeof(pSymFile->aShdrs[0]);
1046 pSymFile->EHdr.e_shnum = RT_ELEMENTS(pSymFile->aShdrs);
1047 pSymFile->EHdr.e_shstrndx = 0; /* set later */
1048
1049 uint32_t offStrTab = 0;
1050#define APPEND_STR(a_szStr) do { \
1051 memcpy(&pSymFile->szzStrTab[offStrTab], a_szStr, sizeof(a_szStr)); \
1052 offStrTab += sizeof(a_szStr); \
1053 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1054 } while (0)
1055#define APPEND_STR_FMT(a_szStr, ...) do { \
1056 offStrTab += RTStrPrintf(&pSymFile->szzStrTab[offStrTab], sizeof(pSymFile->szzStrTab) - offStrTab, a_szStr, __VA_ARGS__); \
1057 offStrTab++; \
1058 Assert(offStrTab < sizeof(pSymFile->szzStrTab)); \
1059 } while (0)
1060
1061 /*
1062 * Section headers.
1063 */
1064 /* Section header #0: NULL */
1065 unsigned i = 0;
1066 APPEND_STR("");
1067 RT_ZERO(pSymFile->aShdrs[i]);
1068 i++;
1069
1070 /* Section header: .eh_frame */
1071 pSymFile->aShdrs[i].sh_name = offStrTab;
1072 APPEND_STR(".eh_frame");
1073 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1074 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1075# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1076 pSymFile->aShdrs[i].sh_offset
1077 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, abEhFrame);
1078# else
1079 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->abEhFrame[0];
1080 pSymFile->aShdrs[i].sh_offset = 0;
1081# endif
1082
1083 pSymFile->aShdrs[i].sh_size = sizeof(pEhFrame->abEhFrame);
1084 pSymFile->aShdrs[i].sh_link = 0;
1085 pSymFile->aShdrs[i].sh_info = 0;
1086 pSymFile->aShdrs[i].sh_addralign = 1;
1087 pSymFile->aShdrs[i].sh_entsize = 0;
1088 memcpy(pSymFile->abEhFrame, pEhFrame->abEhFrame, sizeof(pEhFrame->abEhFrame));
1089 i++;
1090
1091 /* Section header: .shstrtab */
1092 unsigned const iShStrTab = i;
1093 pSymFile->EHdr.e_shstrndx = iShStrTab;
1094 pSymFile->aShdrs[i].sh_name = offStrTab;
1095 APPEND_STR(".shstrtab");
1096 pSymFile->aShdrs[i].sh_type = SHT_STRTAB;
1097 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1098# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1099 pSymFile->aShdrs[i].sh_offset
1100 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1101# else
1102 pSymFile->aShdrs[i].sh_addr = (uintptr_t)&pSymFile->szzStrTab[0];
1103 pSymFile->aShdrs[i].sh_offset = 0;
1104# endif
1105 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->szzStrTab);
1106 pSymFile->aShdrs[i].sh_link = 0;
1107 pSymFile->aShdrs[i].sh_info = 0;
1108 pSymFile->aShdrs[i].sh_addralign = 1;
1109 pSymFile->aShdrs[i].sh_entsize = 0;
1110 i++;
1111
1112 /* Section header: .symbols */
1113 pSymFile->aShdrs[i].sh_name = offStrTab;
1114 APPEND_STR(".symtab");
1115 pSymFile->aShdrs[i].sh_type = SHT_SYMTAB;
1116 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1117 pSymFile->aShdrs[i].sh_offset
1118 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aSymbols);
1119 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aSymbols);
1120 pSymFile->aShdrs[i].sh_link = iShStrTab;
1121 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aSymbols);
1122 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aSymbols[0].st_value);
1123 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aSymbols[0]);
1124 i++;
1125
1126# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1127 /* Section header: .symbols */
1128 pSymFile->aShdrs[i].sh_name = offStrTab;
1129 APPEND_STR(".dynsym");
1130 pSymFile->aShdrs[i].sh_type = SHT_DYNSYM;
1131 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1132 pSymFile->aShdrs[i].sh_offset
1133 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1134 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDynSyms);
1135 pSymFile->aShdrs[i].sh_link = iShStrTab;
1136 pSymFile->aShdrs[i].sh_info = RT_ELEMENTS(pSymFile->aDynSyms);
1137 pSymFile->aShdrs[i].sh_addralign = sizeof(pSymFile->aDynSyms[0].st_value);
1138 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDynSyms[0]);
1139 i++;
1140# endif
1141
1142# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1143 /* Section header: .dynamic */
1144 pSymFile->aShdrs[i].sh_name = offStrTab;
1145 APPEND_STR(".dynamic");
1146 pSymFile->aShdrs[i].sh_type = SHT_DYNAMIC;
1147 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC;
1148 pSymFile->aShdrs[i].sh_offset
1149 = pSymFile->aShdrs[i].sh_addr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1150 pSymFile->aShdrs[i].sh_size = sizeof(pSymFile->aDyn);
1151 pSymFile->aShdrs[i].sh_link = iShStrTab;
1152 pSymFile->aShdrs[i].sh_info = 0;
1153 pSymFile->aShdrs[i].sh_addralign = 1;
1154 pSymFile->aShdrs[i].sh_entsize = sizeof(pSymFile->aDyn[0]);
1155 i++;
1156# endif
1157
1158 /* Section header: .text */
1159 unsigned const iShText = i;
1160 pSymFile->aShdrs[i].sh_name = offStrTab;
1161 APPEND_STR(".text");
1162 pSymFile->aShdrs[i].sh_type = SHT_PROGBITS;
1163 pSymFile->aShdrs[i].sh_flags = SHF_ALLOC | SHF_EXECINSTR;
1164# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN) || defined(IEMNATIVE_USE_GDB_JIT_ELF_RVAS)
1165 pSymFile->aShdrs[i].sh_offset
1166 = pSymFile->aShdrs[i].sh_addr = sizeof(GDBJITSYMFILE);
1167# else
1168 pSymFile->aShdrs[i].sh_addr = (uintptr_t)(pSymFile + 1);
1169 pSymFile->aShdrs[i].sh_offset = 0;
1170# endif
1171 pSymFile->aShdrs[i].sh_size = pExecMemAllocator->cbChunk - offSymFileInChunk - sizeof(GDBJITSYMFILE);
1172 pSymFile->aShdrs[i].sh_link = 0;
1173 pSymFile->aShdrs[i].sh_info = 0;
1174 pSymFile->aShdrs[i].sh_addralign = 1;
1175 pSymFile->aShdrs[i].sh_entsize = 0;
1176 i++;
1177
1178 Assert(i == RT_ELEMENTS(pSymFile->aShdrs));
1179
1180# if defined(IEMNATIVE_USE_GDB_JIT_ET_DYN)
1181 /*
1182 * The program headers:
1183 */
1184 /* Everything in a single LOAD segment: */
1185 i = 0;
1186 pSymFile->aPhdrs[i].p_type = PT_LOAD;
1187 pSymFile->aPhdrs[i].p_flags = PF_X | PF_R;
1188 pSymFile->aPhdrs[i].p_offset
1189 = pSymFile->aPhdrs[i].p_vaddr
1190 = pSymFile->aPhdrs[i].p_paddr = 0;
1191 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1192 = pSymFile->aPhdrs[i].p_memsz = pExecMemAllocator->cbChunk - offSymFileInChunk;
1193 pSymFile->aPhdrs[i].p_align = HOST_PAGE_SIZE;
1194 i++;
1195 /* The .dynamic segment. */
1196 pSymFile->aPhdrs[i].p_type = PT_DYNAMIC;
1197 pSymFile->aPhdrs[i].p_flags = PF_R;
1198 pSymFile->aPhdrs[i].p_offset
1199 = pSymFile->aPhdrs[i].p_vaddr
1200 = pSymFile->aPhdrs[i].p_paddr = RT_UOFFSETOF(GDBJITSYMFILE, aDyn);
1201 pSymFile->aPhdrs[i].p_filesz /* Size of segment in file. */
1202 = pSymFile->aPhdrs[i].p_memsz = sizeof(pSymFile->aDyn);
1203 pSymFile->aPhdrs[i].p_align = sizeof(pSymFile->aDyn[0].d_tag);
1204 i++;
1205
1206 Assert(i == RT_ELEMENTS(pSymFile->aPhdrs));
1207
1208 /*
1209 * The dynamic section:
1210 */
1211 i = 0;
1212 pSymFile->aDyn[i].d_tag = DT_SONAME;
1213 pSymFile->aDyn[i].d_un.d_val = offStrTab;
1214 APPEND_STR_FMT("iem-exec-chunk-%u-%u", pVCpu->idCpu, idxChunk);
1215 i++;
1216 pSymFile->aDyn[i].d_tag = DT_STRTAB;
1217 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, szzStrTab);
1218 i++;
1219 pSymFile->aDyn[i].d_tag = DT_STRSZ;
1220 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->szzStrTab);
1221 i++;
1222 pSymFile->aDyn[i].d_tag = DT_SYMTAB;
1223 pSymFile->aDyn[i].d_un.d_ptr = RT_UOFFSETOF(GDBJITSYMFILE, aDynSyms);
1224 i++;
1225 pSymFile->aDyn[i].d_tag = DT_SYMENT;
1226 pSymFile->aDyn[i].d_un.d_val = sizeof(pSymFile->aDynSyms[0]);
1227 i++;
1228 pSymFile->aDyn[i].d_tag = DT_NULL;
1229 i++;
1230 Assert(i == RT_ELEMENTS(pSymFile->aDyn));
1231# endif /* IEMNATIVE_USE_GDB_JIT_ET_DYN */
1232
1233 /*
1234 * Symbol tables:
1235 */
1236 /** @todo gdb doesn't seem to really like this ... */
1237 i = 0;
1238 pSymFile->aSymbols[i].st_name = 0;
1239 pSymFile->aSymbols[i].st_shndx = SHN_UNDEF;
1240 pSymFile->aSymbols[i].st_value = 0;
1241 pSymFile->aSymbols[i].st_size = 0;
1242 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_NOTYPE);
1243 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1244# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1245 pSymFile->aDynSyms[0] = pSymFile->aSymbols[i];
1246# endif
1247 i++;
1248
1249 pSymFile->aSymbols[i].st_name = 0;
1250 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1251 pSymFile->aSymbols[i].st_value = 0;
1252 pSymFile->aSymbols[i].st_size = 0;
1253 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_LOCAL, STT_FILE);
1254 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1255 i++;
1256
1257 pSymFile->aSymbols[i].st_name = offStrTab;
1258 APPEND_STR_FMT("iem_exec_chunk_%u_%u", pVCpu->idCpu, idxChunk);
1259# if 0
1260 pSymFile->aSymbols[i].st_shndx = iShText;
1261 pSymFile->aSymbols[i].st_value = 0;
1262# else
1263 pSymFile->aSymbols[i].st_shndx = SHN_ABS;
1264 pSymFile->aSymbols[i].st_value = (uintptr_t)(pSymFile + 1);
1265# endif
1266 pSymFile->aSymbols[i].st_size = pSymFile->aShdrs[iShText].sh_size;
1267 pSymFile->aSymbols[i].st_info = ELF64_ST_INFO(STB_GLOBAL, STT_FUNC);
1268 pSymFile->aSymbols[i].st_other = 0 /* STV_DEFAULT */;
1269# ifdef IEMNATIVE_USE_GDB_JIT_ET_DYN
1270 pSymFile->aDynSyms[1] = pSymFile->aSymbols[i];
1271 pSymFile->aDynSyms[1].st_value = (uintptr_t)(pSymFile + 1);
1272# endif
1273 i++;
1274
1275 Assert(i == RT_ELEMENTS(pSymFile->aSymbols));
1276 Assert(offStrTab < sizeof(pSymFile->szzStrTab));
1277
1278 /*
1279 * The GDB JIT entry and informing GDB.
1280 */
1281 pEhFrame->GdbJitEntry.pbSymFile = (uint8_t *)pSymFile;
1282# if 1
1283 pEhFrame->GdbJitEntry.cbSymFile = pExecMemAllocator->cbChunk - ((uintptr_t)pSymFile - (uintptr_t)pvChunk);
1284# else
1285 pEhFrame->GdbJitEntry.cbSymFile = sizeof(GDBJITSYMFILE);
1286# endif
1287
1288 RTOnce(&g_IemNativeGdbJitOnce, iemNativeGdbJitInitOnce, NULL);
1289 RTCritSectEnter(&g_IemNativeGdbJitLock);
1290 pEhFrame->GdbJitEntry.pNext = NULL;
1291 pEhFrame->GdbJitEntry.pPrev = __jit_debug_descriptor.pTail;
1292 if (__jit_debug_descriptor.pTail)
1293 __jit_debug_descriptor.pTail->pNext = &pEhFrame->GdbJitEntry;
1294 else
1295 __jit_debug_descriptor.pHead = &pEhFrame->GdbJitEntry;
1296 __jit_debug_descriptor.pTail = &pEhFrame->GdbJitEntry;
1297 __jit_debug_descriptor.pRelevant = &pEhFrame->GdbJitEntry;
1298
1299 /* Notify GDB: */
1300 __jit_debug_descriptor.enmAction = kGdbJitaction_Register;
1301 __jit_debug_register_code();
1302 __jit_debug_descriptor.enmAction = kGdbJitaction_NoAction;
1303 RTCritSectLeave(&g_IemNativeGdbJitLock);
1304
1305# else /* !IEMNATIVE_USE_GDB_JIT */
1306 RT_NOREF(pVCpu);
1307# endif /* !IEMNATIVE_USE_GDB_JIT */
1308
1309 return VINF_SUCCESS;
1310}
1311
1312# endif /* !RT_OS_WINDOWS */
1313#endif /* IN_RING3 */
1314
1315
1316/**
1317 * Adds another chunk to the executable memory allocator.
1318 *
1319 * This is used by the init code for the initial allocation and later by the
1320 * regular allocator function when it's out of memory.
1321 */
1322static int iemExecMemAllocatorGrow(PVMCPUCC pVCpu, PIEMEXECMEMALLOCATOR pExecMemAllocator)
1323{
1324 /* Check that we've room for growth. */
1325 uint32_t const idxChunk = pExecMemAllocator->cChunks;
1326 AssertLogRelReturn(idxChunk < pExecMemAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1327
1328 /* Allocate a chunk. */
1329#ifdef RT_OS_DARWIN
1330 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, 0);
1331#else
1332 void *pvChunk = RTMemPageAllocEx(pExecMemAllocator->cbChunk, RTMEMPAGEALLOC_F_EXECUTABLE);
1333#endif
1334 AssertLogRelReturn(pvChunk, VERR_NO_EXEC_MEMORY);
1335
1336#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1337 int rc = VINF_SUCCESS;
1338#else
1339 /* Initialize the heap for the chunk. */
1340 RTHEAPSIMPLE hHeap = NIL_RTHEAPSIMPLE;
1341 int rc = RTHeapSimpleInit(&hHeap, pvChunk, pExecMemAllocator->cbChunk);
1342 AssertRC(rc);
1343 if (RT_SUCCESS(rc))
1344 {
1345 /*
1346 * We want the memory to be aligned on 64 byte, so the first time thru
1347 * here we do some exploratory allocations to see how we can achieve this.
1348 * On subsequent runs we only make an initial adjustment allocation, if
1349 * necessary.
1350 *
1351 * Since we own the heap implementation, we know that the internal block
1352 * header is 32 bytes in size for 64-bit systems (see RTHEAPSIMPLEBLOCK),
1353 * so all we need to wrt allocation size adjustments is to add 32 bytes
1354 * to the size, align up by 64 bytes, and subtract 32 bytes.
1355 *
1356 * The heap anchor block is 8 * sizeof(void *) (see RTHEAPSIMPLEINTERNAL),
1357 * which mean 64 bytes on a 64-bit system, so we need to make a 64 byte
1358 * allocation to force subsequent allocations to return 64 byte aligned
1359 * user areas.
1360 */
1361 if (!pExecMemAllocator->cbHeapBlockHdr)
1362 {
1363 pExecMemAllocator->cbHeapBlockHdr = sizeof(void *) * 4; /* See RTHEAPSIMPLEBLOCK. */
1364 pExecMemAllocator->cbHeapAlignTweak = 64;
1365 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak,
1366 32 /*cbAlignment*/);
1367 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_2);
1368
1369 void *pvTest1 = RTHeapSimpleAlloc(hHeap,
1370 RT_ALIGN_32(256 + pExecMemAllocator->cbHeapBlockHdr, 64)
1371 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1372 AssertStmt(pvTest1, rc = VERR_INTERNAL_ERROR_2);
1373 AssertStmt(!((uintptr_t)pvTest1 & 63), rc = VERR_INTERNAL_ERROR_3);
1374
1375 void *pvTest2 = RTHeapSimpleAlloc(hHeap,
1376 RT_ALIGN_32(687 + pExecMemAllocator->cbHeapBlockHdr, 64)
1377 - pExecMemAllocator->cbHeapBlockHdr, 32 /*cbAlignment*/);
1378 AssertStmt(pvTest2, rc = VERR_INTERNAL_ERROR_2);
1379 AssertStmt(!((uintptr_t)pvTest2 & 63), rc = VERR_INTERNAL_ERROR_3);
1380
1381 RTHeapSimpleFree(hHeap, pvTest2);
1382 RTHeapSimpleFree(hHeap, pvTest1);
1383 }
1384 else
1385 {
1386 pExecMemAllocator->pvAlignTweak = RTHeapSimpleAlloc(hHeap, pExecMemAllocator->cbHeapAlignTweak, 32 /*cbAlignment*/);
1387 AssertStmt(pExecMemAllocator->pvAlignTweak, rc = VERR_INTERNAL_ERROR_4);
1388 }
1389 if (RT_SUCCESS(rc))
1390#endif /* !IEMEXECMEM_USE_ALT_SUB_ALLOCATOR */
1391 {
1392 /*
1393 * Add the chunk.
1394 *
1395 * This must be done before the unwind init so windows can allocate
1396 * memory from the chunk when using the alternative sub-allocator.
1397 */
1398 pExecMemAllocator->aChunks[idxChunk].pvChunk = pvChunk;
1399#ifdef IN_RING3
1400 pExecMemAllocator->aChunks[idxChunk].pvUnwindInfo = NULL;
1401#endif
1402#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1403 pExecMemAllocator->aChunks[idxChunk].hHeap = hHeap;
1404#else
1405 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = pExecMemAllocator->cUnitsPerChunk;
1406 pExecMemAllocator->aChunks[idxChunk].idxFreeHint = 0;
1407 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1408 0, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1409#endif
1410
1411 pExecMemAllocator->cChunks = idxChunk + 1;
1412 pExecMemAllocator->idxChunkHint = idxChunk;
1413
1414#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1415 pExecMemAllocator->cbTotal += pExecMemAllocator->cbChunk;
1416 pExecMemAllocator->cbFree += pExecMemAllocator->cbChunk;
1417#else
1418 size_t const cbFree = RTHeapSimpleGetFreeSize(hHeap);
1419 pExecMemAllocator->cbTotal += cbFree;
1420 pExecMemAllocator->cbFree += cbFree;
1421#endif
1422
1423#ifdef IN_RING3
1424 /*
1425 * Initialize the unwind information (this cannot really fail atm).
1426 * (This sets pvUnwindInfo.)
1427 */
1428 rc = iemExecMemAllocatorInitAndRegisterUnwindInfoForChunk(pVCpu, pExecMemAllocator, pvChunk, idxChunk);
1429 if (RT_SUCCESS(rc))
1430#endif
1431 {
1432 return VINF_SUCCESS;
1433 }
1434
1435#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1436 /* Just in case the impossible happens, undo the above up: */
1437 pExecMemAllocator->cbTotal -= pExecMemAllocator->cbChunk;
1438 pExecMemAllocator->cbFree -= pExecMemAllocator->aChunks[idxChunk].cFreeUnits << IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1439 pExecMemAllocator->cChunks = idxChunk;
1440 memset(&pExecMemAllocator->pbmAlloc[pExecMemAllocator->cBitmapElementsPerChunk * idxChunk],
1441 0xff, sizeof(pExecMemAllocator->pbmAlloc[0]) * pExecMemAllocator->cBitmapElementsPerChunk);
1442 pExecMemAllocator->aChunks[idxChunk].pvChunk = NULL;
1443 pExecMemAllocator->aChunks[idxChunk].cFreeUnits = 0;
1444#endif
1445 }
1446#ifndef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1447 }
1448#endif
1449 RTMemPageFree(pvChunk, pExecMemAllocator->cbChunk);
1450 RT_NOREF(pVCpu);
1451 return rc;
1452}
1453
1454
1455/**
1456 * Initializes the executable memory allocator for native recompilation on the
1457 * calling EMT.
1458 *
1459 * @returns VBox status code.
1460 * @param pVCpu The cross context virtual CPU structure of the calling
1461 * thread.
1462 * @param cbMax The max size of the allocator.
1463 * @param cbInitial The initial allocator size.
1464 * @param cbChunk The chunk size, 0 or UINT32_MAX for default (@a cbMax
1465 * dependent).
1466 */
1467int iemExecMemAllocatorInit(PVMCPU pVCpu, uint64_t cbMax, uint64_t cbInitial, uint32_t cbChunk)
1468{
1469 /*
1470 * Validate input.
1471 */
1472 AssertLogRelMsgReturn(cbMax >= _1M && cbMax <= _4G+_4G, ("cbMax=%RU64 (%RX64)\n", cbMax, cbMax), VERR_OUT_OF_RANGE);
1473 AssertReturn(cbInitial <= cbMax, VERR_OUT_OF_RANGE);
1474 AssertLogRelMsgReturn( cbChunk != UINT32_MAX
1475 || cbChunk == 0
1476 || ( RT_IS_POWER_OF_TWO(cbChunk)
1477 && cbChunk >= _1M
1478 && cbChunk <= _256M
1479 && cbChunk <= cbMax),
1480 ("cbChunk=%RU32 (%RX32) cbMax=%RU64\n", cbChunk, cbChunk, cbMax),
1481 VERR_OUT_OF_RANGE);
1482
1483 /*
1484 * Adjust/figure out the chunk size.
1485 */
1486 if (cbChunk == 0 || cbChunk == UINT32_MAX)
1487 {
1488 if (cbMax >= _256M)
1489 cbChunk = _64M;
1490 else
1491 {
1492 if (cbMax < _16M)
1493 cbChunk = cbMax >= _4M ? _4M : (uint32_t)cbMax;
1494 else
1495 cbChunk = (uint32_t)cbMax / 4;
1496 if (!RT_IS_POWER_OF_TWO(cbChunk))
1497 cbChunk = RT_BIT_32(ASMBitLastSetU32(cbChunk));
1498 }
1499 }
1500
1501 if (cbChunk > cbMax)
1502 cbMax = cbChunk;
1503 else
1504 cbMax = (cbMax - 1 + cbChunk) / cbChunk * cbChunk;
1505 uint32_t const cMaxChunks = (uint32_t)(cbMax / cbChunk);
1506 AssertLogRelReturn((uint64_t)cMaxChunks * cbChunk == cbMax, VERR_INTERNAL_ERROR_3);
1507
1508 /*
1509 * Allocate and initialize the allocatore instance.
1510 */
1511 size_t cbNeeded = RT_UOFFSETOF_DYN(IEMEXECMEMALLOCATOR, aChunks[cMaxChunks]);
1512#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1513 size_t const offBitmaps = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1514 size_t const cbBitmap = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3);
1515 cbNeeded += cbBitmap * cMaxChunks;
1516 AssertCompile(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT <= 10);
1517 Assert(cbChunk > RT_BIT_32(IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 3));
1518#endif
1519#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1520 size_t const offEhFrames = RT_ALIGN_Z(cbNeeded, RT_CACHELINE_SIZE);
1521 cbNeeded += sizeof(IEMEXECMEMCHUNKEHFRAME) * cMaxChunks;
1522#endif
1523 PIEMEXECMEMALLOCATOR pExecMemAllocator = (PIEMEXECMEMALLOCATOR)RTMemAllocZ(cbNeeded);
1524 AssertLogRelMsgReturn(pExecMemAllocator, ("cbNeeded=%zx cMaxChunks=%#x cbChunk=%#x\n", cbNeeded, cMaxChunks, cbChunk),
1525 VERR_NO_MEMORY);
1526 pExecMemAllocator->uMagic = IEMEXECMEMALLOCATOR_MAGIC;
1527 pExecMemAllocator->cbChunk = cbChunk;
1528 pExecMemAllocator->cMaxChunks = cMaxChunks;
1529 pExecMemAllocator->cChunks = 0;
1530 pExecMemAllocator->idxChunkHint = 0;
1531 pExecMemAllocator->cAllocations = 0;
1532 pExecMemAllocator->cbTotal = 0;
1533 pExecMemAllocator->cbFree = 0;
1534 pExecMemAllocator->cbAllocated = 0;
1535#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1536 pExecMemAllocator->pbmAlloc = (uint64_t *)((uintptr_t)pExecMemAllocator + offBitmaps);
1537 pExecMemAllocator->cUnitsPerChunk = cbChunk >> IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT;
1538 pExecMemAllocator->cBitmapElementsPerChunk = cbChunk >> (IEMEXECMEM_ALT_SUB_ALLOC_UNIT_SHIFT + 6);
1539 memset(pExecMemAllocator->pbmAlloc, 0xff, cbBitmap); /* Mark everything as allocated. Clear when chunks are added. */
1540#endif
1541#if defined(IN_RING3) && !defined(RT_OS_WINDOWS)
1542 pExecMemAllocator->paEhFrames = (PIEMEXECMEMCHUNKEHFRAME)((uintptr_t)pExecMemAllocator + offEhFrames);
1543#endif
1544 for (uint32_t i = 0; i < cMaxChunks; i++)
1545 {
1546#ifdef IEMEXECMEM_USE_ALT_SUB_ALLOCATOR
1547 pExecMemAllocator->aChunks[i].cFreeUnits = 0;
1548 pExecMemAllocator->aChunks[i].idxFreeHint = 0;
1549#else
1550 pExecMemAllocator->aChunks[i].hHeap = NIL_RTHEAPSIMPLE;
1551#endif
1552 pExecMemAllocator->aChunks[i].pvChunk = NULL;
1553#ifdef IN_RING0
1554 pExecMemAllocator->aChunks[i].hMemObj = NIL_RTR0MEMOBJ;
1555#else
1556 pExecMemAllocator->aChunks[i].pvUnwindInfo = NULL;
1557#endif
1558 }
1559 pVCpu->iem.s.pExecMemAllocatorR3 = pExecMemAllocator;
1560
1561 /*
1562 * Do the initial allocations.
1563 */
1564 while (cbInitial < (uint64_t)pExecMemAllocator->cChunks * pExecMemAllocator->cbChunk)
1565 {
1566 int rc = iemExecMemAllocatorGrow(pVCpu, pExecMemAllocator);
1567 AssertLogRelRCReturn(rc, rc);
1568 }
1569
1570 pExecMemAllocator->idxChunkHint = 0;
1571
1572 return VINF_SUCCESS;
1573}
1574
1575
1576/*********************************************************************************************************************************
1577* Native Recompilation *
1578*********************************************************************************************************************************/
1579
1580
1581/**
1582 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
1583 */
1584IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
1585{
1586 pVCpu->iem.s.cInstructions += idxInstr;
1587 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
1588}
1589
1590
1591/**
1592 * Used by TB code when it wants to raise a \#GP(0).
1593 */
1594IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
1595{
1596 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
1597#ifndef _MSC_VER
1598 return VINF_IEM_RAISED_XCPT; /* not reached */
1599#endif
1600}
1601
1602
1603/**
1604 * Used by TB code when it wants to raise a \#NM.
1605 */
1606IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
1607{
1608 iemRaiseDeviceNotAvailableJmp(pVCpu);
1609#ifndef _MSC_VER
1610 return VINF_IEM_RAISED_XCPT; /* not reached */
1611#endif
1612}
1613
1614
1615/**
1616 * Used by TB code when it wants to raise a \#UD.
1617 */
1618IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
1619{
1620 iemRaiseUndefinedOpcodeJmp(pVCpu);
1621#ifndef _MSC_VER
1622 return VINF_IEM_RAISED_XCPT; /* not reached */
1623#endif
1624}
1625
1626
1627/**
1628 * Used by TB code when it wants to raise a \#MF.
1629 */
1630IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
1631{
1632 iemRaiseMathFaultJmp(pVCpu);
1633#ifndef _MSC_VER
1634 return VINF_IEM_RAISED_XCPT; /* not reached */
1635#endif
1636}
1637
1638
1639/**
1640 * Used by TB code when it wants to raise a \#XF.
1641 */
1642IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
1643{
1644 iemRaiseSimdFpExceptionJmp(pVCpu);
1645#ifndef _MSC_VER
1646 return VINF_IEM_RAISED_XCPT; /* not reached */
1647#endif
1648}
1649
1650
1651/**
1652 * Used by TB code when detecting opcode changes.
1653 * @see iemThreadeFuncWorkerObsoleteTb
1654 */
1655IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
1656{
1657 /* We set fSafeToFree to false where as we're being called in the context
1658 of a TB callback function, which for native TBs means we cannot release
1659 the executable memory till we've returned our way back to iemTbExec as
1660 that return path codes via the native code generated for the TB. */
1661 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1662 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
1663 return VINF_IEM_REEXEC_BREAK;
1664}
1665
1666
1667/**
1668 * Used by TB code when we need to switch to a TB with CS.LIM checking.
1669 */
1670IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
1671{
1672 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
1673 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1674 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
1675 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
1676 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
1677 return VINF_IEM_REEXEC_BREAK;
1678}
1679
1680
1681/**
1682 * Used by TB code when we missed a PC check after a branch.
1683 */
1684IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
1685{
1686 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
1687 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
1688 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
1689 pVCpu->iem.s.pbInstrBuf));
1690 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
1691 return VINF_IEM_REEXEC_BREAK;
1692}
1693
1694
1695
1696/*********************************************************************************************************************************
1697* Helpers: Segmented memory fetches and stores. *
1698*********************************************************************************************************************************/
1699
1700/**
1701 * Used by TB code to load unsigned 8-bit data w/ segmentation.
1702 */
1703IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1704{
1705#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1706 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1707#else
1708 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1709#endif
1710}
1711
1712
1713/**
1714 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1715 * to 16 bits.
1716 */
1717IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1718{
1719#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1720 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1721#else
1722 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1723#endif
1724}
1725
1726
1727/**
1728 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1729 * to 32 bits.
1730 */
1731IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1732{
1733#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1734 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1735#else
1736 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1737#endif
1738}
1739
1740/**
1741 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
1742 * to 64 bits.
1743 */
1744IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1745{
1746#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1747 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
1748#else
1749 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
1750#endif
1751}
1752
1753
1754/**
1755 * Used by TB code to load unsigned 16-bit data w/ segmentation.
1756 */
1757IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1758{
1759#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1760 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1761#else
1762 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1763#endif
1764}
1765
1766
1767/**
1768 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1769 * to 32 bits.
1770 */
1771IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1772{
1773#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1774 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1775#else
1776 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1777#endif
1778}
1779
1780
1781/**
1782 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
1783 * to 64 bits.
1784 */
1785IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1786{
1787#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1788 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
1789#else
1790 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
1791#endif
1792}
1793
1794
1795/**
1796 * Used by TB code to load unsigned 32-bit data w/ segmentation.
1797 */
1798IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1799{
1800#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1801 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1802#else
1803 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1804#endif
1805}
1806
1807
1808/**
1809 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
1810 * to 64 bits.
1811 */
1812IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1813{
1814#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1815 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
1816#else
1817 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
1818#endif
1819}
1820
1821
1822/**
1823 * Used by TB code to load unsigned 64-bit data w/ segmentation.
1824 */
1825IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
1826{
1827#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1828 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
1829#else
1830 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
1831#endif
1832}
1833
1834
1835/**
1836 * Used by TB code to store unsigned 8-bit data w/ segmentation.
1837 */
1838IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
1839{
1840#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1841 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1842#else
1843 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
1844#endif
1845}
1846
1847
1848/**
1849 * Used by TB code to store unsigned 16-bit data w/ segmentation.
1850 */
1851IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
1852{
1853#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1854 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1855#else
1856 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
1857#endif
1858}
1859
1860
1861/**
1862 * Used by TB code to store unsigned 32-bit data w/ segmentation.
1863 */
1864IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
1865{
1866#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1867 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1868#else
1869 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
1870#endif
1871}
1872
1873
1874/**
1875 * Used by TB code to store unsigned 64-bit data w/ segmentation.
1876 */
1877IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
1878{
1879#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1880 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1881#else
1882 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
1883#endif
1884}
1885
1886
1887
1888/**
1889 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
1890 */
1891IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1892{
1893#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1894 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1895#else
1896 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1897#endif
1898}
1899
1900
1901/**
1902 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
1903 */
1904IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1905{
1906#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1907 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1908#else
1909 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1910#endif
1911}
1912
1913
1914/**
1915 * Used by TB code to store an 32-bit selector value onto a generic stack.
1916 *
1917 * Intel CPUs doesn't do write a whole dword, thus the special function.
1918 */
1919IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1920{
1921#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1922 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1923#else
1924 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1925#endif
1926}
1927
1928
1929/**
1930 * Used by TB code to push unsigned 64-bit value onto a generic stack.
1931 */
1932IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1933{
1934#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1935 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1936#else
1937 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1938#endif
1939}
1940
1941
1942/**
1943 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1944 */
1945IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1946{
1947#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1948 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1949#else
1950 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
1951#endif
1952}
1953
1954
1955/**
1956 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1957 */
1958IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1959{
1960#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1961 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
1962#else
1963 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
1964#endif
1965}
1966
1967
1968/**
1969 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
1970 */
1971IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1972{
1973#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1974 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
1975#else
1976 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
1977#endif
1978}
1979
1980
1981
1982/*********************************************************************************************************************************
1983* Helpers: Flat memory fetches and stores. *
1984*********************************************************************************************************************************/
1985
1986/**
1987 * Used by TB code to load unsigned 8-bit data w/ flat address.
1988 * @note Zero extending the value to 64-bit to simplify assembly.
1989 */
1990IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1991{
1992#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1993 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1994#else
1995 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1996#endif
1997}
1998
1999
2000/**
2001 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2002 * to 16 bits.
2003 * @note Zero extending the value to 64-bit to simplify assembly.
2004 */
2005IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2006{
2007#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2008 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2009#else
2010 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2011#endif
2012}
2013
2014
2015/**
2016 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2017 * to 32 bits.
2018 * @note Zero extending the value to 64-bit to simplify assembly.
2019 */
2020IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2021{
2022#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2023 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2024#else
2025 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2026#endif
2027}
2028
2029
2030/**
2031 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
2032 * to 64 bits.
2033 */
2034IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2035{
2036#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2037 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2038#else
2039 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
2040#endif
2041}
2042
2043
2044/**
2045 * Used by TB code to load unsigned 16-bit data w/ flat address.
2046 * @note Zero extending the value to 64-bit to simplify assembly.
2047 */
2048IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2049{
2050#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2051 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2052#else
2053 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2054#endif
2055}
2056
2057
2058/**
2059 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2060 * to 32 bits.
2061 * @note Zero extending the value to 64-bit to simplify assembly.
2062 */
2063IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2064{
2065#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2066 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2067#else
2068 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2069#endif
2070}
2071
2072
2073/**
2074 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
2075 * to 64 bits.
2076 * @note Zero extending the value to 64-bit to simplify assembly.
2077 */
2078IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2079{
2080#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2081 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2082#else
2083 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
2084#endif
2085}
2086
2087
2088/**
2089 * Used by TB code to load unsigned 32-bit data w/ flat address.
2090 * @note Zero extending the value to 64-bit to simplify assembly.
2091 */
2092IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2093{
2094#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2095 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2096#else
2097 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2098#endif
2099}
2100
2101
2102/**
2103 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
2104 * to 64 bits.
2105 * @note Zero extending the value to 64-bit to simplify assembly.
2106 */
2107IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2108{
2109#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2110 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2111#else
2112 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
2113#endif
2114}
2115
2116
2117/**
2118 * Used by TB code to load unsigned 64-bit data w/ flat address.
2119 */
2120IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2121{
2122#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
2123 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
2124#else
2125 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
2126#endif
2127}
2128
2129
2130/**
2131 * Used by TB code to store unsigned 8-bit data w/ flat address.
2132 */
2133IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
2134{
2135#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2136 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
2137#else
2138 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
2139#endif
2140}
2141
2142
2143/**
2144 * Used by TB code to store unsigned 16-bit data w/ flat address.
2145 */
2146IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2147{
2148#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2149 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
2150#else
2151 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
2152#endif
2153}
2154
2155
2156/**
2157 * Used by TB code to store unsigned 32-bit data w/ flat address.
2158 */
2159IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2160{
2161#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2162 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
2163#else
2164 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
2165#endif
2166}
2167
2168
2169/**
2170 * Used by TB code to store unsigned 64-bit data w/ flat address.
2171 */
2172IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2173{
2174#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
2175 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
2176#else
2177 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
2178#endif
2179}
2180
2181
2182
2183/**
2184 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
2185 */
2186IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
2187{
2188#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2189 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
2190#else
2191 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
2192#endif
2193}
2194
2195
2196/**
2197 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
2198 */
2199IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2200{
2201#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2202 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
2203#else
2204 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
2205#endif
2206}
2207
2208
2209/**
2210 * Used by TB code to store a segment selector value onto a flat stack.
2211 *
2212 * Intel CPUs doesn't do write a whole dword, thus the special function.
2213 */
2214IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
2215{
2216#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2217 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
2218#else
2219 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
2220#endif
2221}
2222
2223
2224/**
2225 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
2226 */
2227IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
2228{
2229#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
2230 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
2231#else
2232 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
2233#endif
2234}
2235
2236
2237/**
2238 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
2239 */
2240IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2241{
2242#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2243 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
2244#else
2245 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
2246#endif
2247}
2248
2249
2250/**
2251 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
2252 */
2253IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2254{
2255#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2256 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
2257#else
2258 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
2259#endif
2260}
2261
2262
2263/**
2264 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
2265 */
2266IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
2267{
2268#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
2269 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
2270#else
2271 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
2272#endif
2273}
2274
2275
2276
2277/*********************************************************************************************************************************
2278* Helpers: Segmented memory mapping. *
2279*********************************************************************************************************************************/
2280
2281/**
2282 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
2283 * segmentation.
2284 */
2285IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2286 RTGCPTR GCPtrMem, uint8_t iSegReg))
2287{
2288#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2289 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2290#else
2291 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2292#endif
2293}
2294
2295
2296/**
2297 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
2298 */
2299IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2300 RTGCPTR GCPtrMem, uint8_t iSegReg))
2301{
2302#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2303 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2304#else
2305 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2306#endif
2307}
2308
2309
2310/**
2311 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
2312 */
2313IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2314 RTGCPTR GCPtrMem, uint8_t iSegReg))
2315{
2316#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2317 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2318#else
2319 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2320#endif
2321}
2322
2323
2324/**
2325 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
2326 */
2327IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2328 RTGCPTR GCPtrMem, uint8_t iSegReg))
2329{
2330#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2331 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2332#else
2333 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2334#endif
2335}
2336
2337
2338/**
2339 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
2340 * segmentation.
2341 */
2342IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2343 RTGCPTR GCPtrMem, uint8_t iSegReg))
2344{
2345#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2346 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2347#else
2348 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2349#endif
2350}
2351
2352
2353/**
2354 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
2355 */
2356IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2357 RTGCPTR GCPtrMem, uint8_t iSegReg))
2358{
2359#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2360 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2361#else
2362 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2363#endif
2364}
2365
2366
2367/**
2368 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
2369 */
2370IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2371 RTGCPTR GCPtrMem, uint8_t iSegReg))
2372{
2373#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2374 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2375#else
2376 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2377#endif
2378}
2379
2380
2381/**
2382 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
2383 */
2384IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2385 RTGCPTR GCPtrMem, uint8_t iSegReg))
2386{
2387#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2388 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2389#else
2390 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2391#endif
2392}
2393
2394
2395/**
2396 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
2397 * segmentation.
2398 */
2399IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2400 RTGCPTR GCPtrMem, uint8_t iSegReg))
2401{
2402#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2403 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2404#else
2405 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2406#endif
2407}
2408
2409
2410/**
2411 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
2412 */
2413IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2414 RTGCPTR GCPtrMem, uint8_t iSegReg))
2415{
2416#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2417 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2418#else
2419 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2420#endif
2421}
2422
2423
2424/**
2425 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
2426 */
2427IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2428 RTGCPTR GCPtrMem, uint8_t iSegReg))
2429{
2430#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2431 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2432#else
2433 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2434#endif
2435}
2436
2437
2438/**
2439 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
2440 */
2441IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2442 RTGCPTR GCPtrMem, uint8_t iSegReg))
2443{
2444#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2445 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2446#else
2447 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2448#endif
2449}
2450
2451
2452/**
2453 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
2454 * segmentation.
2455 */
2456IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2457 RTGCPTR GCPtrMem, uint8_t iSegReg))
2458{
2459#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2460 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2461#else
2462 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2463#endif
2464}
2465
2466
2467/**
2468 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
2469 */
2470IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2471 RTGCPTR GCPtrMem, uint8_t iSegReg))
2472{
2473#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2474 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2475#else
2476 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2477#endif
2478}
2479
2480
2481/**
2482 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
2483 */
2484IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2485 RTGCPTR GCPtrMem, uint8_t iSegReg))
2486{
2487#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2488 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2489#else
2490 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2491#endif
2492}
2493
2494
2495/**
2496 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
2497 */
2498IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2499 RTGCPTR GCPtrMem, uint8_t iSegReg))
2500{
2501#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2502 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2503#else
2504 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2505#endif
2506}
2507
2508
2509/**
2510 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
2511 */
2512IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2513 RTGCPTR GCPtrMem, uint8_t iSegReg))
2514{
2515#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2516 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2517#else
2518 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2519#endif
2520}
2521
2522
2523/**
2524 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
2525 */
2526IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2527 RTGCPTR GCPtrMem, uint8_t iSegReg))
2528{
2529#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2530 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2531#else
2532 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2533#endif
2534}
2535
2536
2537/**
2538 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
2539 * segmentation.
2540 */
2541IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2542 RTGCPTR GCPtrMem, uint8_t iSegReg))
2543{
2544#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2545 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2546#else
2547 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2548#endif
2549}
2550
2551
2552/**
2553 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
2554 */
2555IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2556 RTGCPTR GCPtrMem, uint8_t iSegReg))
2557{
2558#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2559 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2560#else
2561 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2562#endif
2563}
2564
2565
2566/**
2567 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
2568 */
2569IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2570 RTGCPTR GCPtrMem, uint8_t iSegReg))
2571{
2572#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2573 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2574#else
2575 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2576#endif
2577}
2578
2579
2580/**
2581 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
2582 */
2583IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
2584 RTGCPTR GCPtrMem, uint8_t iSegReg))
2585{
2586#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2587 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2588#else
2589 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
2590#endif
2591}
2592
2593
2594/*********************************************************************************************************************************
2595* Helpers: Flat memory mapping. *
2596*********************************************************************************************************************************/
2597
2598/**
2599 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
2600 * address.
2601 */
2602IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2603{
2604#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2605 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2606#else
2607 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2608#endif
2609}
2610
2611
2612/**
2613 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
2614 */
2615IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2616{
2617#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2618 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2619#else
2620 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2621#endif
2622}
2623
2624
2625/**
2626 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
2627 */
2628IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2629{
2630#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2631 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2632#else
2633 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2634#endif
2635}
2636
2637
2638/**
2639 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
2640 */
2641IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2642{
2643#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2644 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2645#else
2646 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2647#endif
2648}
2649
2650
2651/**
2652 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
2653 * address.
2654 */
2655IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2656{
2657#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2658 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2659#else
2660 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2661#endif
2662}
2663
2664
2665/**
2666 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
2667 */
2668IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2669{
2670#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2671 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2672#else
2673 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2674#endif
2675}
2676
2677
2678/**
2679 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
2680 */
2681IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2682{
2683#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2684 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2685#else
2686 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2687#endif
2688}
2689
2690
2691/**
2692 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
2693 */
2694IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2695{
2696#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2697 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2698#else
2699 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2700#endif
2701}
2702
2703
2704/**
2705 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
2706 * address.
2707 */
2708IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2709{
2710#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2711 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2712#else
2713 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2714#endif
2715}
2716
2717
2718/**
2719 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
2720 */
2721IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2722{
2723#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2724 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2725#else
2726 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2727#endif
2728}
2729
2730
2731/**
2732 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
2733 */
2734IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2735{
2736#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2737 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2738#else
2739 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2740#endif
2741}
2742
2743
2744/**
2745 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
2746 */
2747IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2748{
2749#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2750 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2751#else
2752 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2753#endif
2754}
2755
2756
2757/**
2758 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
2759 * address.
2760 */
2761IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2762{
2763#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2764 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2765#else
2766 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2767#endif
2768}
2769
2770
2771/**
2772 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
2773 */
2774IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2775{
2776#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2777 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2778#else
2779 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2780#endif
2781}
2782
2783
2784/**
2785 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
2786 */
2787IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2788{
2789#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2790 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2791#else
2792 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2793#endif
2794}
2795
2796
2797/**
2798 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
2799 */
2800IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2801{
2802#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2803 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2804#else
2805 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2806#endif
2807}
2808
2809
2810/**
2811 * Used by TB code to map 80-bit float data writeonly w/ flat address.
2812 */
2813IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2814{
2815#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2816 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2817#else
2818 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2819#endif
2820}
2821
2822
2823/**
2824 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
2825 */
2826IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2827{
2828#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2829 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2830#else
2831 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2832#endif
2833}
2834
2835
2836/**
2837 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
2838 * address.
2839 */
2840IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2841{
2842#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2843 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2844#else
2845 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2846#endif
2847}
2848
2849
2850/**
2851 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
2852 */
2853IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2854{
2855#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2856 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2857#else
2858 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2859#endif
2860}
2861
2862
2863/**
2864 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
2865 */
2866IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2867{
2868#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2869 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2870#else
2871 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2872#endif
2873}
2874
2875
2876/**
2877 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
2878 */
2879IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
2880{
2881#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2882 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2883#else
2884 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2885#endif
2886}
2887
2888
2889/*********************************************************************************************************************************
2890* Helpers: Commit, rollback & unmap *
2891*********************************************************************************************************************************/
2892
2893/**
2894 * Used by TB code to commit and unmap a read-write memory mapping.
2895 */
2896IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2897{
2898 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
2899}
2900
2901
2902/**
2903 * Used by TB code to commit and unmap a read-write memory mapping.
2904 */
2905IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2906{
2907 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2908}
2909
2910
2911/**
2912 * Used by TB code to commit and unmap a write-only memory mapping.
2913 */
2914IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2915{
2916 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2917}
2918
2919
2920/**
2921 * Used by TB code to commit and unmap a read-only memory mapping.
2922 */
2923IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2924{
2925 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2926}
2927
2928
2929/**
2930 * Reinitializes the native recompiler state.
2931 *
2932 * Called before starting a new recompile job.
2933 */
2934static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2935{
2936 pReNative->cLabels = 0;
2937 pReNative->bmLabelTypes = 0;
2938 pReNative->cFixups = 0;
2939#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2940 pReNative->pDbgInfo->cEntries = 0;
2941#endif
2942 pReNative->pTbOrg = pTb;
2943 pReNative->cCondDepth = 0;
2944 pReNative->uCondSeqNo = 0;
2945 pReNative->uCheckIrqSeqNo = 0;
2946 pReNative->uTlbSeqNo = 0;
2947
2948#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2949 pReNative->Core.offPc = 0;
2950 pReNative->Core.cInstrPcUpdateSkipped = 0;
2951#endif
2952 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2953#if IEMNATIVE_HST_GREG_COUNT < 32
2954 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2955#endif
2956 ;
2957 pReNative->Core.bmHstRegsWithGstShadow = 0;
2958 pReNative->Core.bmGstRegShadows = 0;
2959 pReNative->Core.bmVars = 0;
2960 pReNative->Core.bmStack = 0;
2961 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
2962 pReNative->Core.u64ArgVars = UINT64_MAX;
2963
2964 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 13);
2965 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
2966 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
2967 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
2968 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
2969 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
2970 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
2971 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
2972 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
2973 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
2974 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
2975 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
2976 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
2977 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
2978
2979 /* Full host register reinit: */
2980 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
2981 {
2982 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
2983 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
2984 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
2985 }
2986
2987 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
2988 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
2989#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2990 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
2991#endif
2992#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2993 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2994#endif
2995#ifdef IEMNATIVE_REG_FIXED_TMP1
2996 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
2997#endif
2998#ifdef IEMNATIVE_REG_FIXED_PC_DBG
2999 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
3000#endif
3001 );
3002 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3003 {
3004 fRegs &= ~RT_BIT_32(idxReg);
3005 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
3006 }
3007
3008 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
3009#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
3010 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
3011#endif
3012#ifdef IEMNATIVE_REG_FIXED_TMP0
3013 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3014#endif
3015#ifdef IEMNATIVE_REG_FIXED_TMP1
3016 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
3017#endif
3018#ifdef IEMNATIVE_REG_FIXED_PC_DBG
3019 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
3020#endif
3021
3022#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3023# ifdef RT_ARCH_ARM64
3024 /*
3025 * Arm64 has 32 128-bit registers only, in order to support emulating 256-bit registers we pair
3026 * two real registers statically to one virtual for now, leaving us with only 16 256-bit registers.
3027 * We always pair v0 with v1, v2 with v3, etc. so we mark the higher register as fixed here during init
3028 * and the register allocator assumes that it will be always free when the lower is picked.
3029 */
3030 uint32_t const fFixedAdditional = UINT32_C(0xaaaaaaaa);
3031# else
3032 uint32_t const fFixedAdditional = 0;
3033# endif
3034
3035 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
3036 | fFixedAdditional
3037# if IEMNATIVE_HST_SIMD_REG_COUNT < 32
3038 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
3039# endif
3040 ;
3041 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
3042 pReNative->Core.bmGstSimdRegShadows = 0;
3043 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
3044 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
3045
3046 /* Full host register reinit: */
3047 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
3048 {
3049 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
3050 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
3051 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
3052 }
3053
3054 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK | fFixedAdditional;
3055 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
3056 {
3057 fRegs &= ~RT_BIT_32(idxReg);
3058 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
3059 }
3060
3061#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
3062 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
3063#endif
3064
3065#endif
3066
3067 return pReNative;
3068}
3069
3070
3071/**
3072 * Allocates and initializes the native recompiler state.
3073 *
3074 * This is called the first time an EMT wants to recompile something.
3075 *
3076 * @returns Pointer to the new recompiler state.
3077 * @param pVCpu The cross context virtual CPU structure of the calling
3078 * thread.
3079 * @param pTb The TB that's about to be recompiled.
3080 * @thread EMT(pVCpu)
3081 */
3082static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
3083{
3084 VMCPU_ASSERT_EMT(pVCpu);
3085
3086 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
3087 AssertReturn(pReNative, NULL);
3088
3089 /*
3090 * Try allocate all the buffers and stuff we need.
3091 */
3092 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
3093 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K);
3094 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K);
3095#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3096 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K]));
3097#endif
3098 if (RT_LIKELY( pReNative->pInstrBuf
3099 && pReNative->paLabels
3100 && pReNative->paFixups)
3101#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3102 && pReNative->pDbgInfo
3103#endif
3104 )
3105 {
3106 /*
3107 * Set the buffer & array sizes on success.
3108 */
3109 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
3110 pReNative->cLabelsAlloc = _8K;
3111 pReNative->cFixupsAlloc = _16K;
3112#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3113 pReNative->cDbgInfoAlloc = _16K;
3114#endif
3115
3116 /* Other constant stuff: */
3117 pReNative->pVCpu = pVCpu;
3118
3119 /*
3120 * Done, just need to save it and reinit it.
3121 */
3122 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative;
3123 return iemNativeReInit(pReNative, pTb);
3124 }
3125
3126 /*
3127 * Failed. Cleanup and return.
3128 */
3129 AssertFailed();
3130 RTMemFree(pReNative->pInstrBuf);
3131 RTMemFree(pReNative->paLabels);
3132 RTMemFree(pReNative->paFixups);
3133#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3134 RTMemFree(pReNative->pDbgInfo);
3135#endif
3136 RTMemFree(pReNative);
3137 return NULL;
3138}
3139
3140
3141/**
3142 * Creates a label
3143 *
3144 * If the label does not yet have a defined position,
3145 * call iemNativeLabelDefine() later to set it.
3146 *
3147 * @returns Label ID. Throws VBox status code on failure, so no need to check
3148 * the return value.
3149 * @param pReNative The native recompile state.
3150 * @param enmType The label type.
3151 * @param offWhere The instruction offset of the label. UINT32_MAX if the
3152 * label is not yet defined (default).
3153 * @param uData Data associated with the lable. Only applicable to
3154 * certain type of labels. Default is zero.
3155 */
3156DECL_HIDDEN_THROW(uint32_t)
3157iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3158 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
3159{
3160 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
3161
3162 /*
3163 * Locate existing label definition.
3164 *
3165 * This is only allowed for forward declarations where offWhere=UINT32_MAX
3166 * and uData is zero.
3167 */
3168 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3169 uint32_t const cLabels = pReNative->cLabels;
3170 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
3171#ifndef VBOX_STRICT
3172 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
3173 && offWhere == UINT32_MAX
3174 && uData == 0
3175#endif
3176 )
3177 {
3178#ifndef VBOX_STRICT
3179 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
3180 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3181 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
3182 if (idxLabel < pReNative->cLabels)
3183 return idxLabel;
3184#else
3185 for (uint32_t i = 0; i < cLabels; i++)
3186 if ( paLabels[i].enmType == enmType
3187 && paLabels[i].uData == uData)
3188 {
3189 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3190 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3191 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
3192 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
3193 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3194 return i;
3195 }
3196 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
3197 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
3198#endif
3199 }
3200
3201 /*
3202 * Make sure we've got room for another label.
3203 */
3204 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
3205 { /* likely */ }
3206 else
3207 {
3208 uint32_t cNew = pReNative->cLabelsAlloc;
3209 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3210 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
3211 cNew *= 2;
3212 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
3213 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
3214 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
3215 pReNative->paLabels = paLabels;
3216 pReNative->cLabelsAlloc = cNew;
3217 }
3218
3219 /*
3220 * Define a new label.
3221 */
3222 paLabels[cLabels].off = offWhere;
3223 paLabels[cLabels].enmType = enmType;
3224 paLabels[cLabels].uData = uData;
3225 pReNative->cLabels = cLabels + 1;
3226
3227 Assert((unsigned)enmType < 64);
3228 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
3229
3230 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3231 {
3232 Assert(uData == 0);
3233 pReNative->aidxUniqueLabels[enmType] = cLabels;
3234 }
3235
3236 if (offWhere != UINT32_MAX)
3237 {
3238#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3239 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3240 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
3241#endif
3242 }
3243 return cLabels;
3244}
3245
3246
3247/**
3248 * Defines the location of an existing label.
3249 *
3250 * @param pReNative The native recompile state.
3251 * @param idxLabel The label to define.
3252 * @param offWhere The position.
3253 */
3254DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
3255{
3256 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
3257 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
3258 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
3259 pLabel->off = offWhere;
3260#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3261 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
3262 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
3263#endif
3264}
3265
3266
3267/**
3268 * Looks up a lable.
3269 *
3270 * @returns Label ID if found, UINT32_MAX if not.
3271 */
3272static uint32_t iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
3273 uint32_t offWhere = UINT32_MAX, uint16_t uData = 0) RT_NOEXCEPT
3274{
3275 Assert((unsigned)enmType < 64);
3276 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
3277 {
3278 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
3279 return pReNative->aidxUniqueLabels[enmType];
3280
3281 PIEMNATIVELABEL paLabels = pReNative->paLabels;
3282 uint32_t const cLabels = pReNative->cLabels;
3283 for (uint32_t i = 0; i < cLabels; i++)
3284 if ( paLabels[i].enmType == enmType
3285 && paLabels[i].uData == uData
3286 && ( paLabels[i].off == offWhere
3287 || offWhere == UINT32_MAX
3288 || paLabels[i].off == UINT32_MAX))
3289 return i;
3290 }
3291 return UINT32_MAX;
3292}
3293
3294
3295/**
3296 * Adds a fixup.
3297 *
3298 * @throws VBox status code (int) on failure.
3299 * @param pReNative The native recompile state.
3300 * @param offWhere The instruction offset of the fixup location.
3301 * @param idxLabel The target label ID for the fixup.
3302 * @param enmType The fixup type.
3303 * @param offAddend Fixup addend if applicable to the type. Default is 0.
3304 */
3305DECL_HIDDEN_THROW(void)
3306iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
3307 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
3308{
3309 Assert(idxLabel <= UINT16_MAX);
3310 Assert((unsigned)enmType <= UINT8_MAX);
3311
3312 /*
3313 * Make sure we've room.
3314 */
3315 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
3316 uint32_t const cFixups = pReNative->cFixups;
3317 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
3318 { /* likely */ }
3319 else
3320 {
3321 uint32_t cNew = pReNative->cFixupsAlloc;
3322 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3323 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
3324 cNew *= 2;
3325 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
3326 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
3327 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
3328 pReNative->paFixups = paFixups;
3329 pReNative->cFixupsAlloc = cNew;
3330 }
3331
3332 /*
3333 * Add the fixup.
3334 */
3335 paFixups[cFixups].off = offWhere;
3336 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
3337 paFixups[cFixups].enmType = enmType;
3338 paFixups[cFixups].offAddend = offAddend;
3339 pReNative->cFixups = cFixups + 1;
3340}
3341
3342
3343/**
3344 * Slow code path for iemNativeInstrBufEnsure.
3345 */
3346DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
3347{
3348 /* Double the buffer size till we meet the request. */
3349 uint32_t cNew = pReNative->cInstrBufAlloc;
3350 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
3351 do
3352 cNew *= 2;
3353 while (cNew < off + cInstrReq);
3354
3355 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
3356#ifdef RT_ARCH_ARM64
3357 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
3358#else
3359 uint32_t const cbMaxInstrBuf = _2M;
3360#endif
3361 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
3362
3363 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
3364 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
3365
3366#ifdef VBOX_STRICT
3367 pReNative->offInstrBufChecked = off + cInstrReq;
3368#endif
3369 pReNative->cInstrBufAlloc = cNew;
3370 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
3371}
3372
3373#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3374
3375/**
3376 * Grows the static debug info array used during recompilation.
3377 *
3378 * @returns Pointer to the new debug info block; throws VBox status code on
3379 * failure, so no need to check the return value.
3380 */
3381DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3382{
3383 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
3384 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
3385 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
3386 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
3387 pReNative->pDbgInfo = pDbgInfo;
3388 pReNative->cDbgInfoAlloc = cNew;
3389 return pDbgInfo;
3390}
3391
3392
3393/**
3394 * Adds a new debug info uninitialized entry, returning the pointer to it.
3395 */
3396DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
3397{
3398 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
3399 { /* likely */ }
3400 else
3401 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
3402 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
3403}
3404
3405
3406/**
3407 * Debug Info: Adds a native offset record, if necessary.
3408 */
3409static void iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3410{
3411 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
3412
3413 /*
3414 * Search backwards to see if we've got a similar record already.
3415 */
3416 uint32_t idx = pDbgInfo->cEntries;
3417 uint32_t idxStop = idx > 8 ? idx - 8 : 0;
3418 while (idx-- > idxStop)
3419 if (pDbgInfo->aEntries[idx].Gen.uType == kIemTbDbgEntryType_NativeOffset)
3420 {
3421 if (pDbgInfo->aEntries[idx].NativeOffset.offNative == off)
3422 return;
3423 AssertStmt(pDbgInfo->aEntries[idx].NativeOffset.offNative < off,
3424 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
3425 break;
3426 }
3427
3428 /*
3429 * Add it.
3430 */
3431 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
3432 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
3433 pEntry->NativeOffset.offNative = off;
3434}
3435
3436
3437/**
3438 * Debug Info: Record info about a label.
3439 */
3440static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
3441{
3442 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3443 pEntry->Label.uType = kIemTbDbgEntryType_Label;
3444 pEntry->Label.uUnused = 0;
3445 pEntry->Label.enmLabel = (uint8_t)enmType;
3446 pEntry->Label.uData = uData;
3447}
3448
3449
3450/**
3451 * Debug Info: Record info about a threaded call.
3452 */
3453static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
3454{
3455 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3456 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
3457 pEntry->ThreadedCall.fRecompiled = fRecompiled;
3458 pEntry->ThreadedCall.uUnused = 0;
3459 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
3460}
3461
3462
3463/**
3464 * Debug Info: Record info about a new guest instruction.
3465 */
3466static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
3467{
3468 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3469 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
3470 pEntry->GuestInstruction.uUnused = 0;
3471 pEntry->GuestInstruction.fExec = fExec;
3472}
3473
3474
3475/**
3476 * Debug Info: Record info about guest register shadowing.
3477 */
3478static void iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
3479 uint8_t idxHstReg = UINT8_MAX, uint8_t idxHstRegPrev = UINT8_MAX)
3480{
3481 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3482 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
3483 pEntry->GuestRegShadowing.uUnused = 0;
3484 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
3485 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
3486 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
3487}
3488
3489
3490# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3491/**
3492 * Debug Info: Record info about guest register shadowing.
3493 */
3494static void iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
3495 uint8_t idxHstSimdReg = UINT8_MAX, uint8_t idxHstSimdRegPrev = UINT8_MAX)
3496{
3497 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3498 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
3499 pEntry->GuestSimdRegShadowing.uUnused = 0;
3500 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
3501 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
3502 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
3503}
3504# endif
3505
3506
3507# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3508/**
3509 * Debug Info: Record info about delayed RIP updates.
3510 */
3511static void iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t offPc, uint32_t cInstrSkipped)
3512{
3513 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
3514 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
3515 pEntry->DelayedPcUpdate.offPc = offPc;
3516 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
3517}
3518# endif
3519
3520#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
3521
3522
3523/*********************************************************************************************************************************
3524* Register Allocator *
3525*********************************************************************************************************************************/
3526
3527/**
3528 * Register parameter indexes (indexed by argument number).
3529 */
3530DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
3531{
3532 IEMNATIVE_CALL_ARG0_GREG,
3533 IEMNATIVE_CALL_ARG1_GREG,
3534 IEMNATIVE_CALL_ARG2_GREG,
3535 IEMNATIVE_CALL_ARG3_GREG,
3536#if defined(IEMNATIVE_CALL_ARG4_GREG)
3537 IEMNATIVE_CALL_ARG4_GREG,
3538# if defined(IEMNATIVE_CALL_ARG5_GREG)
3539 IEMNATIVE_CALL_ARG5_GREG,
3540# if defined(IEMNATIVE_CALL_ARG6_GREG)
3541 IEMNATIVE_CALL_ARG6_GREG,
3542# if defined(IEMNATIVE_CALL_ARG7_GREG)
3543 IEMNATIVE_CALL_ARG7_GREG,
3544# endif
3545# endif
3546# endif
3547#endif
3548};
3549
3550/**
3551 * Call register masks indexed by argument count.
3552 */
3553DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
3554{
3555 0,
3556 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
3557 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
3558 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
3559 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3560 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
3561#if defined(IEMNATIVE_CALL_ARG4_GREG)
3562 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3563 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
3564# if defined(IEMNATIVE_CALL_ARG5_GREG)
3565 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3566 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
3567# if defined(IEMNATIVE_CALL_ARG6_GREG)
3568 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3569 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3570 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
3571# if defined(IEMNATIVE_CALL_ARG7_GREG)
3572 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
3573 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
3574 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
3575# endif
3576# endif
3577# endif
3578#endif
3579};
3580
3581#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
3582/**
3583 * BP offset of the stack argument slots.
3584 *
3585 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
3586 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
3587 */
3588DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
3589{
3590 IEMNATIVE_FP_OFF_STACK_ARG0,
3591# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
3592 IEMNATIVE_FP_OFF_STACK_ARG1,
3593# endif
3594# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
3595 IEMNATIVE_FP_OFF_STACK_ARG2,
3596# endif
3597# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
3598 IEMNATIVE_FP_OFF_STACK_ARG3,
3599# endif
3600};
3601AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
3602#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
3603
3604/**
3605 * Info about shadowed guest register values.
3606 * @see IEMNATIVEGSTREG
3607 */
3608static struct
3609{
3610 /** Offset in VMCPU. */
3611 uint32_t off;
3612 /** The field size. */
3613 uint8_t cb;
3614 /** Name (for logging). */
3615 const char *pszName;
3616} const g_aGstShadowInfo[] =
3617{
3618#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
3619 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
3620 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
3621 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
3622 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
3623 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
3624 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
3625 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
3626 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
3627 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
3628 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
3629 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
3630 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
3631 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
3632 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
3633 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
3634 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
3635 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
3636 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
3637 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
3638 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
3639 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
3640 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
3641 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
3642 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
3643 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
3644 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
3645 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
3646 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
3647 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
3648 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
3649 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
3650 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
3651 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
3652 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
3653 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
3654 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
3655 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
3656 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
3657 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
3658 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
3659 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
3660 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
3661 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
3662 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
3663 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
3664 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
3665 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
3666 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
3667#undef CPUMCTX_OFF_AND_SIZE
3668};
3669AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
3670
3671
3672/** Host CPU general purpose register names. */
3673DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
3674{
3675#ifdef RT_ARCH_AMD64
3676 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
3677#elif RT_ARCH_ARM64
3678 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
3679 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
3680#else
3681# error "port me"
3682#endif
3683};
3684
3685
3686DECL_FORCE_INLINE(uint8_t) iemNativeRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, unsigned idxReg,
3687 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
3688{
3689 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3690
3691 pReNative->Core.aHstRegs[idxReg].enmWhat = enmWhat;
3692 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3693 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
3694 return (uint8_t)idxReg;
3695}
3696
3697
3698#if 0 /* unused */
3699/**
3700 * Tries to locate a suitable register in the given register mask.
3701 *
3702 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3703 * failed.
3704 *
3705 * @returns Host register number on success, returns UINT8_MAX on failure.
3706 */
3707static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
3708{
3709 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3710 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3711 if (fRegs)
3712 {
3713 /** @todo pick better here: */
3714 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
3715
3716 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3717 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3718 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3719 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3720
3721 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3722 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3723 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3724 return idxReg;
3725 }
3726 return UINT8_MAX;
3727}
3728#endif /* unused */
3729
3730
3731/**
3732 * Locate a register, possibly freeing one up.
3733 *
3734 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3735 * failed.
3736 *
3737 * @returns Host register number on success. Returns UINT8_MAX if no registers
3738 * found, the caller is supposed to deal with this and raise a
3739 * allocation type specific status code (if desired).
3740 *
3741 * @throws VBox status code if we're run into trouble spilling a variable of
3742 * recording debug info. Does NOT throw anything if we're out of
3743 * registers, though.
3744 */
3745static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3746 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3747{
3748 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
3749 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3750 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3751
3752 /*
3753 * Try a freed register that's shadowing a guest register.
3754 */
3755 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3756 if (fRegs)
3757 {
3758 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
3759
3760#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3761 /*
3762 * When we have livness information, we use it to kick out all shadowed
3763 * guest register that will not be needed any more in this TB. If we're
3764 * lucky, this may prevent us from ending up here again.
3765 *
3766 * Note! We must consider the previous entry here so we don't free
3767 * anything that the current threaded function requires (current
3768 * entry is produced by the next threaded function).
3769 */
3770 uint32_t const idxCurCall = pReNative->idxCurCall;
3771 if (idxCurCall > 0)
3772 {
3773 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
3774
3775# ifndef IEMLIVENESS_EXTENDED_LAYOUT
3776 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
3777 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
3778 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
3779#else
3780 /* Construct a mask of the registers not in the read or write state.
3781 Note! We could skips writes, if they aren't from us, as this is just
3782 a hack to prevent trashing registers that have just been written
3783 or will be written when we retire the current instruction. */
3784 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3785 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3786 & IEMLIVENESSBIT_MASK;
3787#endif
3788 /* Merge EFLAGS. */
3789 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
3790 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
3791 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
3792 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
3793 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
3794
3795 /* If it matches any shadowed registers. */
3796 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
3797 {
3798 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
3799 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
3800 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
3801
3802 /* See if we've got any unshadowed registers we can return now. */
3803 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
3804 if (fUnshadowedRegs)
3805 {
3806 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
3807 return (fPreferVolatile
3808 ? ASMBitFirstSetU32(fUnshadowedRegs)
3809 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3810 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
3811 - 1;
3812 }
3813 }
3814 }
3815#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
3816
3817 unsigned const idxReg = (fPreferVolatile
3818 ? ASMBitFirstSetU32(fRegs)
3819 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3820 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
3821 - 1;
3822
3823 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3824 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3825 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3826 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3827
3828 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3829 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3830 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3831 return idxReg;
3832 }
3833
3834 /*
3835 * Try free up a variable that's in a register.
3836 *
3837 * We do two rounds here, first evacuating variables we don't need to be
3838 * saved on the stack, then in the second round move things to the stack.
3839 */
3840 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
3841 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3842 {
3843 uint32_t fVars = pReNative->Core.bmVars;
3844 while (fVars)
3845 {
3846 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3847 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3848 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3849 && (RT_BIT_32(idxReg) & fRegMask)
3850 && ( iLoop == 0
3851 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3852 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3853 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3854 {
3855 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3856 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3857 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3858 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3859 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3860 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3861
3862 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3863 {
3864 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
3865 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3866 }
3867
3868 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3869 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3870
3871 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3872 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3873 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3874 return idxReg;
3875 }
3876 fVars &= ~RT_BIT_32(idxVar);
3877 }
3878 }
3879
3880 return UINT8_MAX;
3881}
3882
3883
3884/**
3885 * Reassigns a variable to a different register specified by the caller.
3886 *
3887 * @returns The new code buffer position.
3888 * @param pReNative The native recompile state.
3889 * @param off The current code buffer position.
3890 * @param idxVar The variable index.
3891 * @param idxRegOld The old host register number.
3892 * @param idxRegNew The new host register number.
3893 * @param pszCaller The caller for logging.
3894 */
3895static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3896 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3897{
3898 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3899 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
3900 RT_NOREF(pszCaller);
3901
3902 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3903
3904 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3905 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
3906 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3907 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3908
3909 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3910 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3911 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3912 if (fGstRegShadows)
3913 {
3914 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3915 | RT_BIT_32(idxRegNew);
3916 while (fGstRegShadows)
3917 {
3918 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3919 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3920
3921 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3922 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3923 }
3924 }
3925
3926 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
3927 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3928 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3929 return off;
3930}
3931
3932
3933/**
3934 * Moves a variable to a different register or spills it onto the stack.
3935 *
3936 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3937 * kinds can easily be recreated if needed later.
3938 *
3939 * @returns The new code buffer position.
3940 * @param pReNative The native recompile state.
3941 * @param off The current code buffer position.
3942 * @param idxVar The variable index.
3943 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3944 * call-volatile registers.
3945 */
3946static uint32_t iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3947 uint32_t fForbiddenRegs = IEMNATIVE_CALL_VOLATILE_GREG_MASK)
3948{
3949 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3950 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3951 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
3952 Assert(!pVar->fRegAcquired);
3953
3954 uint8_t const idxRegOld = pVar->idxReg;
3955 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3956 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3957 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3958 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3959 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3960 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3961 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3962 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3963
3964
3965 /** @todo Add statistics on this.*/
3966 /** @todo Implement basic variable liveness analysis (python) so variables
3967 * can be freed immediately once no longer used. This has the potential to
3968 * be trashing registers and stack for dead variables.
3969 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
3970
3971 /*
3972 * First try move it to a different register, as that's cheaper.
3973 */
3974 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3975 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3976 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3977 if (fRegs)
3978 {
3979 /* Avoid using shadow registers, if possible. */
3980 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3981 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3982 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3983 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3984 }
3985
3986 /*
3987 * Otherwise we must spill the register onto the stack.
3988 */
3989 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3990 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3991 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3992 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3993
3994 pVar->idxReg = UINT8_MAX;
3995 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3996 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3997 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3998 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3999 return off;
4000}
4001
4002
4003/**
4004 * Allocates a temporary host general purpose register.
4005 *
4006 * This may emit code to save register content onto the stack in order to free
4007 * up a register.
4008 *
4009 * @returns The host register number; throws VBox status code on failure,
4010 * so no need to check the return value.
4011 * @param pReNative The native recompile state.
4012 * @param poff Pointer to the variable with the code buffer position.
4013 * This will be update if we need to move a variable from
4014 * register to stack in order to satisfy the request.
4015 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4016 * registers (@c true, default) or the other way around
4017 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4018 */
4019DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
4020{
4021 /*
4022 * Try find a completely unused register, preferably a call-volatile one.
4023 */
4024 uint8_t idxReg;
4025 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4026 & ~pReNative->Core.bmHstRegsWithGstShadow
4027 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
4028 if (fRegs)
4029 {
4030 if (fPreferVolatile)
4031 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4032 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4033 else
4034 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4035 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4036 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4037 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4038 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4039 }
4040 else
4041 {
4042 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
4043 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4044 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4045 }
4046 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4047}
4048
4049
4050/**
4051 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
4052 * registers.
4053 *
4054 * @returns The host register number; throws VBox status code on failure,
4055 * so no need to check the return value.
4056 * @param pReNative The native recompile state.
4057 * @param poff Pointer to the variable with the code buffer position.
4058 * This will be update if we need to move a variable from
4059 * register to stack in order to satisfy the request.
4060 * @param fRegMask Mask of acceptable registers.
4061 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4062 * registers (@c true, default) or the other way around
4063 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
4064 */
4065DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
4066 bool fPreferVolatile /*= true*/)
4067{
4068 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
4069 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
4070
4071 /*
4072 * Try find a completely unused register, preferably a call-volatile one.
4073 */
4074 uint8_t idxReg;
4075 uint32_t fRegs = ~pReNative->Core.bmHstRegs
4076 & ~pReNative->Core.bmHstRegsWithGstShadow
4077 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
4078 & fRegMask;
4079 if (fRegs)
4080 {
4081 if (fPreferVolatile)
4082 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
4083 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4084 else
4085 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4086 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
4087 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4088 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4089 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
4090 }
4091 else
4092 {
4093 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
4094 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
4095 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
4096 }
4097 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
4098}
4099
4100
4101/**
4102 * Allocates a temporary register for loading an immediate value into.
4103 *
4104 * This will emit code to load the immediate, unless there happens to be an
4105 * unused register with the value already loaded.
4106 *
4107 * The caller will not modify the returned register, it must be considered
4108 * read-only. Free using iemNativeRegFreeTmpImm.
4109 *
4110 * @returns The host register number; throws VBox status code on failure, so no
4111 * need to check the return value.
4112 * @param pReNative The native recompile state.
4113 * @param poff Pointer to the variable with the code buffer position.
4114 * @param uImm The immediate value that the register must hold upon
4115 * return.
4116 * @param fPreferVolatile Whether to prefer volatile over non-volatile
4117 * registers (@c true, default) or the other way around
4118 * (@c false).
4119 *
4120 * @note Reusing immediate values has not been implemented yet.
4121 */
4122DECL_HIDDEN_THROW(uint8_t)
4123iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
4124{
4125 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
4126 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
4127 return idxReg;
4128}
4129
4130#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4131
4132# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4133/**
4134 * Helper for iemNativeLivenessGetStateByGstReg.
4135 *
4136 * @returns IEMLIVENESS_STATE_XXX
4137 * @param fMergedStateExp2 This is the RT_BIT_32() of each sub-state
4138 * ORed together.
4139 */
4140DECL_FORCE_INLINE(uint32_t)
4141iemNativeLivenessMergeExpandedEFlagsState(uint32_t fMergedStateExp2)
4142{
4143 /* INPUT trumps anything else. */
4144 if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_INPUT))
4145 return IEMLIVENESS_STATE_INPUT;
4146
4147 /* CLOBBERED trumps XCPT_OR_CALL and UNUSED. */
4148 if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_CLOBBERED))
4149 {
4150 /* If not all sub-fields are clobbered they must be considered INPUT. */
4151 if (fMergedStateExp2 & (RT_BIT_32(IEMLIVENESS_STATE_UNUSED) | RT_BIT_32(IEMLIVENESS_STATE_XCPT_OR_CALL)))
4152 return IEMLIVENESS_STATE_INPUT;
4153 return IEMLIVENESS_STATE_CLOBBERED;
4154 }
4155
4156 /* XCPT_OR_CALL trumps UNUSED. */
4157 if (fMergedStateExp2 & RT_BIT_32(IEMLIVENESS_STATE_XCPT_OR_CALL))
4158 return IEMLIVENESS_STATE_XCPT_OR_CALL;
4159
4160 return IEMLIVENESS_STATE_UNUSED;
4161}
4162# endif /* !IEMLIVENESS_EXTENDED_LAYOUT */
4163
4164
4165DECL_FORCE_INLINE(uint32_t)
4166iemNativeLivenessGetStateByGstRegEx(PCIEMLIVENESSENTRY pLivenessEntry, unsigned enmGstRegEx)
4167{
4168# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4169 return ((pLivenessEntry->Bit0.bm64 >> enmGstRegEx) & 1)
4170 | (((pLivenessEntry->Bit1.bm64 >> enmGstRegEx) << 1) & 2);
4171# else
4172 return ( (pLivenessEntry->Bit0.bm64 >> enmGstRegEx) & 1)
4173 | (((pLivenessEntry->Bit1.bm64 >> enmGstRegEx) << 1) & 2)
4174 | (((pLivenessEntry->Bit2.bm64 >> enmGstRegEx) << 2) & 4)
4175 | (((pLivenessEntry->Bit3.bm64 >> enmGstRegEx) << 2) & 8);
4176# endif
4177}
4178
4179
4180DECL_FORCE_INLINE(uint32_t)
4181iemNativeLivenessGetStateByGstReg(PCIEMLIVENESSENTRY pLivenessEntry, IEMNATIVEGSTREG enmGstReg)
4182{
4183 uint32_t uRet = iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, (unsigned)enmGstReg);
4184 if (enmGstReg == kIemNativeGstReg_EFlags)
4185 {
4186 /* Merge the eflags states to one. */
4187# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4188 uRet = RT_BIT_32(uRet);
4189 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflCf | (pLivenessEntry->Bit1.fEflCf << 1));
4190 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflPf | (pLivenessEntry->Bit1.fEflPf << 1));
4191 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflAf | (pLivenessEntry->Bit1.fEflAf << 1));
4192 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflZf | (pLivenessEntry->Bit1.fEflZf << 1));
4193 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflSf | (pLivenessEntry->Bit1.fEflSf << 1));
4194 uRet |= RT_BIT_32(pLivenessEntry->Bit0.fEflOf | (pLivenessEntry->Bit1.fEflOf << 1));
4195 uRet = iemNativeLivenessMergeExpandedEFlagsState(uRet);
4196# else
4197 AssertCompile(IEMLIVENESSBIT_IDX_EFL_OTHER == (unsigned)kIemNativeGstReg_EFlags);
4198 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_CF);
4199 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_PF);
4200 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_AF);
4201 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_ZF);
4202 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_SF);
4203 uRet |= iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, IEMLIVENESSBIT_IDX_EFL_OF);
4204# endif
4205 }
4206 return uRet;
4207}
4208
4209
4210# ifdef VBOX_STRICT
4211/** For assertions only, user checks that idxCurCall isn't zerow. */
4212DECL_FORCE_INLINE(uint32_t)
4213iemNativeLivenessGetPrevStateByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg)
4214{
4215 return iemNativeLivenessGetStateByGstReg(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], enmGstReg);
4216}
4217# endif /* VBOX_STRICT */
4218
4219#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4220
4221/**
4222 * Marks host register @a idxHstReg as containing a shadow copy of guest
4223 * register @a enmGstReg.
4224 *
4225 * ASSUMES that caller has made sure @a enmGstReg is not associated with any
4226 * host register before calling.
4227 */
4228DECL_FORCE_INLINE(void)
4229iemNativeRegMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
4230{
4231 Assert(!(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg)));
4232 Assert(!pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4233 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
4234
4235 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxHstReg;
4236 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = RT_BIT_64(enmGstReg); /** @todo why? not OR? */
4237 pReNative->Core.bmGstRegShadows |= RT_BIT_64(enmGstReg);
4238 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
4239#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4240 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4241 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxHstReg);
4242#else
4243 RT_NOREF(off);
4244#endif
4245}
4246
4247
4248/**
4249 * Clear any guest register shadow claims from @a idxHstReg.
4250 *
4251 * The register does not need to be shadowing any guest registers.
4252 */
4253DECL_FORCE_INLINE(void)
4254iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off)
4255{
4256 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4257 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
4258 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4259 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4260 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4261
4262#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4263 uint64_t fGstRegs = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4264 if (fGstRegs)
4265 {
4266 Assert(fGstRegs < RT_BIT_64(kIemNativeGstReg_End));
4267 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4268 while (fGstRegs)
4269 {
4270 unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4271 fGstRegs &= ~RT_BIT_64(iGstReg);
4272 iemNativeDbgInfoAddGuestRegShadowing(pReNative, (IEMNATIVEGSTREG)iGstReg, UINT8_MAX, idxHstReg);
4273 }
4274 }
4275#else
4276 RT_NOREF(off);
4277#endif
4278
4279 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4280 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4281 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4282}
4283
4284
4285/**
4286 * Clear guest register shadow claim regarding @a enmGstReg from @a idxHstReg
4287 * and global overview flags.
4288 */
4289DECL_FORCE_INLINE(void)
4290iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg, uint32_t off)
4291{
4292 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4293 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4294 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows
4295 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4296 Assert(pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg));
4297 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4298 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4299
4300#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4301 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4302 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, UINT8_MAX, idxHstReg);
4303#else
4304 RT_NOREF(off);
4305#endif
4306
4307 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~RT_BIT_64(enmGstReg);
4308 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4309 if (!fGstRegShadowsNew)
4310 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4311 pReNative->Core.bmGstRegShadows &= ~RT_BIT_64(enmGstReg);
4312}
4313
4314
4315#if 0 /* unused */
4316/**
4317 * Clear any guest register shadow claim for @a enmGstReg.
4318 */
4319DECL_FORCE_INLINE(void)
4320iemNativeRegClearGstRegShadowingByGstReg(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg, uint32_t off)
4321{
4322 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4323 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4324 {
4325 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] < RT_ELEMENTS(pReNative->Core.aHstRegs));
4326 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
4327 }
4328}
4329#endif
4330
4331
4332/**
4333 * Clear any guest register shadow claim for @a enmGstReg and mark @a idxHstRegNew
4334 * as the new shadow of it.
4335 *
4336 * Unlike the other guest reg shadow helpers, this does the logging for you.
4337 * However, it is the liveness state is not asserted here, the caller must do
4338 * that.
4339 */
4340DECL_FORCE_INLINE(void)
4341iemNativeRegClearAndMarkAsGstRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstRegNew,
4342 IEMNATIVEGSTREG enmGstReg, uint32_t off)
4343{
4344 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4345 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4346 {
4347 uint8_t const idxHstRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
4348 Assert(idxHstRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
4349 if (idxHstRegOld == idxHstRegNew)
4350 return;
4351 Log12(("iemNativeRegClearAndMarkAsGstRegShadow: %s for guest %s (from %s)\n", g_apszIemNativeHstRegNames[idxHstRegNew],
4352 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstRegOld]));
4353 iemNativeRegClearGstRegShadowingOne(pReNative, pReNative->Core.aidxGstRegShadows[enmGstReg], enmGstReg, off);
4354 }
4355 else
4356 Log12(("iemNativeRegClearAndMarkAsGstRegShadow: %s for guest %s\n", g_apszIemNativeHstRegNames[idxHstRegNew],
4357 g_aGstShadowInfo[enmGstReg].pszName));
4358 iemNativeRegMarkAsGstRegShadow(pReNative, idxHstRegNew, enmGstReg, off);
4359}
4360
4361
4362/**
4363 * Transfers the guest register shadow claims of @a enmGstReg from @a idxRegFrom
4364 * to @a idxRegTo.
4365 */
4366DECL_FORCE_INLINE(void)
4367iemNativeRegTransferGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxRegFrom, uint8_t idxRegTo,
4368 IEMNATIVEGSTREG enmGstReg, uint32_t off)
4369{
4370 Assert(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & RT_BIT_64(enmGstReg));
4371 Assert(pReNative->Core.aidxGstRegShadows[enmGstReg] == idxRegFrom);
4372 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows)
4373 == pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows
4374 && pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4375 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows)
4376 == pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows);
4377 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegFrom))
4378 == RT_BOOL(pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows));
4379
4380 uint64_t const fGstRegShadowsFrom = pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows & ~RT_BIT_64(enmGstReg);
4381 pReNative->Core.aHstRegs[idxRegFrom].fGstRegShadows = fGstRegShadowsFrom;
4382 if (!fGstRegShadowsFrom)
4383 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegFrom);
4384 pReNative->Core.bmHstRegsWithGstShadow |= RT_BIT_32(idxRegTo);
4385 pReNative->Core.aHstRegs[idxRegTo].fGstRegShadows |= RT_BIT_64(enmGstReg);
4386 pReNative->Core.aidxGstRegShadows[enmGstReg] = idxRegTo;
4387#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4388 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4389 iemNativeDbgInfoAddGuestRegShadowing(pReNative, enmGstReg, idxRegTo, idxRegFrom);
4390#else
4391 RT_NOREF(off);
4392#endif
4393}
4394
4395
4396/**
4397 * Allocates a temporary host general purpose register for keeping a guest
4398 * register value.
4399 *
4400 * Since we may already have a register holding the guest register value,
4401 * code will be emitted to do the loading if that's not the case. Code may also
4402 * be emitted if we have to free up a register to satify the request.
4403 *
4404 * @returns The host register number; throws VBox status code on failure, so no
4405 * need to check the return value.
4406 * @param pReNative The native recompile state.
4407 * @param poff Pointer to the variable with the code buffer
4408 * position. This will be update if we need to move a
4409 * variable from register to stack in order to satisfy
4410 * the request.
4411 * @param enmGstReg The guest register that will is to be updated.
4412 * @param enmIntendedUse How the caller will be using the host register.
4413 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
4414 * register is okay (default). The ASSUMPTION here is
4415 * that the caller has already flushed all volatile
4416 * registers, so this is only applied if we allocate a
4417 * new register.
4418 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
4419 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
4420 */
4421DECL_HIDDEN_THROW(uint8_t)
4422iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
4423 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
4424 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
4425{
4426 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4427#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4428 AssertMsg( fSkipLivenessAssert
4429 || pReNative->idxCurCall == 0
4430 || enmGstReg == kIemNativeGstReg_Pc
4431 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
4432 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4433 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
4434 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4435 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
4436 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4437#endif
4438 RT_NOREF(fSkipLivenessAssert);
4439#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
4440 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
4441#endif
4442 uint32_t const fRegMask = !fNoVolatileRegs
4443 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
4444 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4445
4446 /*
4447 * First check if the guest register value is already in a host register.
4448 */
4449 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4450 {
4451 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4452 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4453 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4454 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4455
4456 /* It's not supposed to be allocated... */
4457 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4458 {
4459 /*
4460 * If the register will trash the guest shadow copy, try find a
4461 * completely unused register we can use instead. If that fails,
4462 * we need to disassociate the host reg from the guest reg.
4463 */
4464 /** @todo would be nice to know if preserving the register is in any way helpful. */
4465 /* If the purpose is calculations, try duplicate the register value as
4466 we'll be clobbering the shadow. */
4467 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
4468 && ( ~pReNative->Core.bmHstRegs
4469 & ~pReNative->Core.bmHstRegsWithGstShadow
4470 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
4471 {
4472 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
4473
4474 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4475
4476 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4477 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4478 g_apszIemNativeHstRegNames[idxRegNew]));
4479 idxReg = idxRegNew;
4480 }
4481 /* If the current register matches the restrictions, go ahead and allocate
4482 it for the caller. */
4483 else if (fRegMask & RT_BIT_32(idxReg))
4484 {
4485 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4486 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4487 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4488 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4489 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
4490 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4491 else
4492 {
4493 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
4494 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
4495 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4496 }
4497 }
4498 /* Otherwise, allocate a register that satisfies the caller and transfer
4499 the shadowing if compatible with the intended use. (This basically
4500 means the call wants a non-volatile register (RSP push/pop scenario).) */
4501 else
4502 {
4503 Assert(fNoVolatileRegs);
4504 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
4505 !fNoVolatileRegs
4506 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
4507 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4508 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4509 {
4510 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4511 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
4512 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
4513 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4514 }
4515 else
4516 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
4517 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4518 g_apszIemNativeHstRegNames[idxRegNew]));
4519 idxReg = idxRegNew;
4520 }
4521 }
4522 else
4523 {
4524 /*
4525 * Oops. Shadowed guest register already allocated!
4526 *
4527 * Allocate a new register, copy the value and, if updating, the
4528 * guest shadow copy assignment to the new register.
4529 */
4530 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4531 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
4532 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
4533 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
4534
4535 /** @todo share register for readonly access. */
4536 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
4537 enmIntendedUse == kIemNativeGstRegUse_Calculation);
4538
4539 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4540 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
4541
4542 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
4543 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4544 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
4545 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4546 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4547 else
4548 {
4549 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
4550 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
4551 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
4552 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
4553 }
4554 idxReg = idxRegNew;
4555 }
4556 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
4557
4558#ifdef VBOX_STRICT
4559 /* Strict builds: Check that the value is correct. */
4560 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4561#endif
4562
4563 return idxReg;
4564 }
4565
4566 /*
4567 * Allocate a new register, load it with the guest value and designate it as a copy of the
4568 */
4569 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
4570
4571 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
4572 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
4573
4574 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
4575 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
4576 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
4577 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
4578
4579 return idxRegNew;
4580}
4581
4582
4583/**
4584 * Allocates a temporary host general purpose register that already holds the
4585 * given guest register value.
4586 *
4587 * The use case for this function is places where the shadowing state cannot be
4588 * modified due to branching and such. This will fail if the we don't have a
4589 * current shadow copy handy or if it's incompatible. The only code that will
4590 * be emitted here is value checking code in strict builds.
4591 *
4592 * The intended use can only be readonly!
4593 *
4594 * @returns The host register number, UINT8_MAX if not present.
4595 * @param pReNative The native recompile state.
4596 * @param poff Pointer to the instruction buffer offset.
4597 * Will be updated in strict builds if a register is
4598 * found.
4599 * @param enmGstReg The guest register that will is to be updated.
4600 * @note In strict builds, this may throw instruction buffer growth failures.
4601 * Non-strict builds will not throw anything.
4602 * @sa iemNativeRegAllocTmpForGuestReg
4603 */
4604DECL_HIDDEN_THROW(uint8_t)
4605iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4606{
4607 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4608#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4609 AssertMsg( pReNative->idxCurCall == 0
4610 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4611 || enmGstReg == kIemNativeGstReg_Pc,
4612 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4613#endif
4614
4615 /*
4616 * First check if the guest register value is already in a host register.
4617 */
4618 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4619 {
4620 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4621 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4622 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4623 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4624
4625 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4626 {
4627 /*
4628 * We only do readonly use here, so easy compared to the other
4629 * variant of this code.
4630 */
4631 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4632 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4633 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4634 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
4635 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4636
4637#ifdef VBOX_STRICT
4638 /* Strict builds: Check that the value is correct. */
4639 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4640#else
4641 RT_NOREF(poff);
4642#endif
4643 return idxReg;
4644 }
4645 }
4646
4647 return UINT8_MAX;
4648}
4649
4650
4651/**
4652 * Allocates argument registers for a function call.
4653 *
4654 * @returns New code buffer offset on success; throws VBox status code on failure, so no
4655 * need to check the return value.
4656 * @param pReNative The native recompile state.
4657 * @param off The current code buffer offset.
4658 * @param cArgs The number of arguments the function call takes.
4659 */
4660DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
4661{
4662 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
4663 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
4664 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4665 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4666
4667 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4668 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4669 else if (cArgs == 0)
4670 return true;
4671
4672 /*
4673 * Do we get luck and all register are free and not shadowing anything?
4674 */
4675 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
4676 for (uint32_t i = 0; i < cArgs; i++)
4677 {
4678 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4679 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4680 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4681 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4682 }
4683 /*
4684 * Okay, not lucky so we have to free up the registers.
4685 */
4686 else
4687 for (uint32_t i = 0; i < cArgs; i++)
4688 {
4689 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4690 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
4691 {
4692 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4693 {
4694 case kIemNativeWhat_Var:
4695 {
4696 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4697 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4698 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
4699 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4700 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
4701
4702 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
4703 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4704 else
4705 {
4706 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4707 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4708 }
4709 break;
4710 }
4711
4712 case kIemNativeWhat_Tmp:
4713 case kIemNativeWhat_Arg:
4714 case kIemNativeWhat_rc:
4715 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4716 default:
4717 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
4718 }
4719
4720 }
4721 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4722 {
4723 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4724 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4725 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4726 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4727 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4728 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4729 }
4730 else
4731 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4732 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4733 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4734 }
4735 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
4736 return true;
4737}
4738
4739
4740DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
4741
4742
4743#if 0
4744/**
4745 * Frees a register assignment of any type.
4746 *
4747 * @param pReNative The native recompile state.
4748 * @param idxHstReg The register to free.
4749 *
4750 * @note Does not update variables.
4751 */
4752DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4753{
4754 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4755 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4756 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
4757 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
4758 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
4759 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
4760 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
4761 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
4762 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
4763 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
4764 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4765 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4766 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4767 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4768
4769 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4770 /* no flushing, right:
4771 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4772 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4773 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4774 */
4775}
4776#endif
4777
4778
4779/**
4780 * Frees a temporary register.
4781 *
4782 * Any shadow copies of guest registers assigned to the host register will not
4783 * be flushed by this operation.
4784 */
4785DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4786{
4787 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4788 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
4789 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4790 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
4791 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4792}
4793
4794
4795/**
4796 * Frees a temporary immediate register.
4797 *
4798 * It is assumed that the call has not modified the register, so it still hold
4799 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
4800 */
4801DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4802{
4803 iemNativeRegFreeTmp(pReNative, idxHstReg);
4804}
4805
4806
4807/**
4808 * Frees a register assigned to a variable.
4809 *
4810 * The register will be disassociated from the variable.
4811 */
4812DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4813{
4814 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4815 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4816 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4817 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4818 Assert(pReNative->Core.aVars[idxVar].idxReg == idxHstReg);
4819
4820 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4821 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4822 if (!fFlushShadows)
4823 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4824 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4825 else
4826 {
4827 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4828 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4829 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4830 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4831 uint64_t fGstRegShadows = fGstRegShadowsOld;
4832 while (fGstRegShadows)
4833 {
4834 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4835 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4836
4837 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4838 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4839 }
4840 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4841 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4842 }
4843}
4844
4845
4846/**
4847 * Called right before emitting a call instruction to move anything important
4848 * out of call-volatile registers, free and flush the call-volatile registers,
4849 * optionally freeing argument variables.
4850 *
4851 * @returns New code buffer offset, UINT32_MAX on failure.
4852 * @param pReNative The native recompile state.
4853 * @param off The code buffer offset.
4854 * @param cArgs The number of arguments the function call takes.
4855 * It is presumed that the host register part of these have
4856 * been allocated as such already and won't need moving,
4857 * just freeing.
4858 * @param fKeepVars Mask of variables that should keep their register
4859 * assignments. Caller must take care to handle these.
4860 */
4861DECL_HIDDEN_THROW(uint32_t)
4862iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4863{
4864 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4865
4866 /* fKeepVars will reduce this mask. */
4867 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4868
4869 /*
4870 * Move anything important out of volatile registers.
4871 */
4872 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4873 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4874 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_GREG_MASK
4875#ifdef IEMNATIVE_REG_FIXED_TMP0
4876 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
4877#endif
4878#ifdef IEMNATIVE_REG_FIXED_TMP1
4879 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
4880#endif
4881#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4882 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4883#endif
4884 & ~g_afIemNativeCallRegs[cArgs];
4885
4886 fRegsToMove &= pReNative->Core.bmHstRegs;
4887 if (!fRegsToMove)
4888 { /* likely */ }
4889 else
4890 {
4891 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4892 while (fRegsToMove != 0)
4893 {
4894 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4895 fRegsToMove &= ~RT_BIT_32(idxReg);
4896
4897 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4898 {
4899 case kIemNativeWhat_Var:
4900 {
4901 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4902 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4903 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4904 Assert(pVar->idxReg == idxReg);
4905 if (!(RT_BIT_32(idxVar) & fKeepVars))
4906 {
4907 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4908 idxVar, pVar->enmKind, pVar->idxReg));
4909 if (pVar->enmKind != kIemNativeVarKind_Stack)
4910 pVar->idxReg = UINT8_MAX;
4911 else
4912 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4913 }
4914 else
4915 fRegsToFree &= ~RT_BIT_32(idxReg);
4916 continue;
4917 }
4918
4919 case kIemNativeWhat_Arg:
4920 AssertMsgFailed(("What?!?: %u\n", idxReg));
4921 continue;
4922
4923 case kIemNativeWhat_rc:
4924 case kIemNativeWhat_Tmp:
4925 AssertMsgFailed(("Missing free: %u\n", idxReg));
4926 continue;
4927
4928 case kIemNativeWhat_FixedTmp:
4929 case kIemNativeWhat_pVCpuFixed:
4930 case kIemNativeWhat_pCtxFixed:
4931 case kIemNativeWhat_PcShadow:
4932 case kIemNativeWhat_FixedReserved:
4933 case kIemNativeWhat_Invalid:
4934 case kIemNativeWhat_End:
4935 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4936 }
4937 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4938 }
4939 }
4940
4941 /*
4942 * Do the actual freeing.
4943 */
4944 if (pReNative->Core.bmHstRegs & fRegsToFree)
4945 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4946 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4947 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4948
4949 /* If there are guest register shadows in any call-volatile register, we
4950 have to clear the corrsponding guest register masks for each register. */
4951 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4952 if (fHstRegsWithGstShadow)
4953 {
4954 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4955 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK, fHstRegsWithGstShadow));
4956 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4957 do
4958 {
4959 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4960 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4961
4962 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4963 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4964 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4965 } while (fHstRegsWithGstShadow != 0);
4966 }
4967
4968 return off;
4969}
4970
4971
4972/**
4973 * Flushes a set of guest register shadow copies.
4974 *
4975 * This is usually done after calling a threaded function or a C-implementation
4976 * of an instruction.
4977 *
4978 * @param pReNative The native recompile state.
4979 * @param fGstRegs Set of guest registers to flush.
4980 */
4981DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4982{
4983 /*
4984 * Reduce the mask by what's currently shadowed
4985 */
4986 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4987 fGstRegs &= bmGstRegShadowsOld;
4988 if (fGstRegs)
4989 {
4990 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4991 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4992 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4993 if (bmGstRegShadowsNew)
4994 {
4995 /*
4996 * Partial.
4997 */
4998 do
4999 {
5000 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5001 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5002 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5003 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
5004 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5005
5006 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
5007 fGstRegs &= ~fInThisHstReg;
5008 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5009 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5010 if (!fGstRegShadowsNew)
5011 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5012 } while (fGstRegs != 0);
5013 }
5014 else
5015 {
5016 /*
5017 * Clear all.
5018 */
5019 do
5020 {
5021 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5022 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5023 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5024 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
5025 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5026
5027 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5028 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5029 } while (fGstRegs != 0);
5030 pReNative->Core.bmHstRegsWithGstShadow = 0;
5031 }
5032 }
5033}
5034
5035
5036/**
5037 * Flushes guest register shadow copies held by a set of host registers.
5038 *
5039 * This is used with the TLB lookup code for ensuring that we don't carry on
5040 * with any guest shadows in volatile registers, as these will get corrupted by
5041 * a TLB miss.
5042 *
5043 * @param pReNative The native recompile state.
5044 * @param fHstRegs Set of host registers to flush guest shadows for.
5045 */
5046DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
5047{
5048 /*
5049 * Reduce the mask by what's currently shadowed.
5050 */
5051 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
5052 fHstRegs &= bmHstRegsWithGstShadowOld;
5053 if (fHstRegs)
5054 {
5055 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
5056 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
5057 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
5058 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
5059 if (bmHstRegsWithGstShadowNew)
5060 {
5061 /*
5062 * Partial (likely).
5063 */
5064 uint64_t fGstShadows = 0;
5065 do
5066 {
5067 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5068 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
5069 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
5070 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
5071
5072 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5073 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5074 fHstRegs &= ~RT_BIT_32(idxHstReg);
5075 } while (fHstRegs != 0);
5076 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
5077 }
5078 else
5079 {
5080 /*
5081 * Clear all.
5082 */
5083 do
5084 {
5085 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5086 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
5087 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
5088 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
5089
5090 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
5091 fHstRegs &= ~RT_BIT_32(idxHstReg);
5092 } while (fHstRegs != 0);
5093 pReNative->Core.bmGstRegShadows = 0;
5094 }
5095 }
5096}
5097
5098
5099/**
5100 * Restores guest shadow copies in volatile registers.
5101 *
5102 * This is used after calling a helper function (think TLB miss) to restore the
5103 * register state of volatile registers.
5104 *
5105 * @param pReNative The native recompile state.
5106 * @param off The code buffer offset.
5107 * @param fHstRegsActiveShadows Set of host registers which are allowed to
5108 * be active (allocated) w/o asserting. Hack.
5109 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
5110 * iemNativeVarRestoreVolatileRegsPostHlpCall()
5111 */
5112DECL_HIDDEN_THROW(uint32_t)
5113iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
5114{
5115 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
5116 if (fHstRegs)
5117 {
5118 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
5119 do
5120 {
5121 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
5122
5123 /* It's not fatal if a register is active holding a variable that
5124 shadowing a guest register, ASSUMING all pending guest register
5125 writes were flushed prior to the helper call. However, we'll be
5126 emitting duplicate restores, so it wasts code space. */
5127 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
5128 RT_NOREF(fHstRegsActiveShadows);
5129
5130 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5131 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
5132 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
5133 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
5134
5135 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
5136 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
5137
5138 fHstRegs &= ~RT_BIT_32(idxHstReg);
5139 } while (fHstRegs != 0);
5140 }
5141 return off;
5142}
5143
5144
5145#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5146# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
5147static uint32_t iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5148{
5149 /* Compare the shadow with the context value, they should match. */
5150 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
5151 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
5152 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
5153 return off;
5154}
5155# endif
5156
5157/**
5158 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
5159 */
5160static uint32_t
5161iemNativeEmitPcWriteback(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5162{
5163 if (pReNative->Core.offPc)
5164 {
5165# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5166 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5167 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, pReNative->Core.cInstrPcUpdateSkipped);
5168# endif
5169
5170# ifndef IEMNATIVE_REG_FIXED_PC_DBG
5171 /* Allocate a temporary PC register. */
5172 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5173
5174 /* Perform the addition and store the result. */
5175 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5176 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5177
5178 /* Free but don't flush the PC register. */
5179 iemNativeRegFreeTmp(pReNative, idxPcReg);
5180# else
5181 /* Compare the shadow with the context value, they should match. */
5182 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
5183 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
5184# endif
5185
5186 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, pReNative->Core.cInstrPcUpdateSkipped);
5187 pReNative->Core.offPc = 0;
5188 pReNative->Core.cInstrPcUpdateSkipped = 0;
5189 }
5190# if 0 /*def IEMNATIVE_WITH_TB_DEBUG_INFO*/
5191 else
5192 {
5193 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5194 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc);
5195 }
5196# endif
5197
5198 return off;
5199}
5200#endif
5201
5202
5203#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5204
5205
5206/*********************************************************************************************************************************
5207* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
5208*********************************************************************************************************************************/
5209
5210/**
5211 * Info about shadowed guest SIMD register values.
5212 * @see IEMNATIVEGSTSIMDREG
5213 */
5214static struct
5215{
5216 /** Offset in VMCPU of XMM (low 128-bit) registers. */
5217 uint32_t offXmm;
5218 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
5219 uint32_t offYmm;
5220 /** Name (for logging). */
5221 const char *pszName;
5222} const g_aGstSimdShadowInfo[] =
5223{
5224#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
5225 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
5226 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
5227 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
5228 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
5229 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
5230 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
5231 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
5232 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
5233 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
5234 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
5235 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
5236 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
5237 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
5238 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
5239 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
5240 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
5241 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
5242#undef CPUMCTX_OFF_AND_SIZE
5243};
5244AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
5245
5246
5247#ifdef LOG_ENABLED
5248/** Host CPU SIMD register names. */
5249DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
5250{
5251#ifdef RT_ARCH_AMD64
5252 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
5253#elif RT_ARCH_ARM64
5254 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
5255 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
5256#else
5257# error "port me"
5258#endif
5259};
5260#endif
5261
5262
5263DECL_FORCE_INLINE(uint8_t) iemNativeSimdRegMarkAllocated(PIEMRECOMPILERSTATE pReNative, uint8_t idxSimdReg,
5264 IEMNATIVEWHAT enmWhat, uint8_t idxVar = UINT8_MAX) RT_NOEXCEPT
5265{
5266 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5267
5268 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = enmWhat;
5269 pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows = 0;
5270 RT_NOREF(idxVar);
5271 return idxSimdReg;
5272}
5273
5274
5275/**
5276 * Frees a temporary SIMD register.
5277 *
5278 * Any shadow copies of guest registers assigned to the host register will not
5279 * be flushed by this operation.
5280 */
5281DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
5282{
5283 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
5284 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
5285 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5286 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
5287 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5288}
5289
5290
5291/**
5292 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
5293 *
5294 * @returns New code bufferoffset.
5295 * @param pReNative The native recompile state.
5296 * @param off Current code buffer position.
5297 * @param enmGstSimdReg The guest SIMD register to flush.
5298 */
5299static uint32_t iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
5300{
5301 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5302
5303 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
5304 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
5305 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg),
5306 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)));
5307
5308 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
5309 {
5310 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5311 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
5312 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5313 }
5314
5315 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))
5316 {
5317 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5318 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
5319 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5320 }
5321
5322 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, enmGstSimdReg);
5323 return off;
5324}
5325
5326
5327/**
5328 * Locate a register, possibly freeing one up.
5329 *
5330 * This ASSUMES the caller has done the minimal/optimal allocation checks and
5331 * failed.
5332 *
5333 * @returns Host register number on success. Returns UINT8_MAX if no registers
5334 * found, the caller is supposed to deal with this and raise a
5335 * allocation type specific status code (if desired).
5336 *
5337 * @throws VBox status code if we're run into trouble spilling a variable of
5338 * recording debug info. Does NOT throw anything if we're out of
5339 * registers, though.
5340 */
5341static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
5342 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
5343{
5344 //STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
5345 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5346 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5347
5348 /*
5349 * Try a freed register that's shadowing a guest register.
5350 */
5351 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
5352 if (fRegs)
5353 {
5354 //STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
5355
5356#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5357 /*
5358 * When we have livness information, we use it to kick out all shadowed
5359 * guest register that will not be needed any more in this TB. If we're
5360 * lucky, this may prevent us from ending up here again.
5361 *
5362 * Note! We must consider the previous entry here so we don't free
5363 * anything that the current threaded function requires (current
5364 * entry is produced by the next threaded function).
5365 */
5366 uint32_t const idxCurCall = pReNative->idxCurCall;
5367 if (idxCurCall > 0)
5368 {
5369 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
5370
5371# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5372 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
5373 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
5374 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
5375#else
5376 /* Construct a mask of the registers not in the read or write state.
5377 Note! We could skips writes, if they aren't from us, as this is just
5378 a hack to prevent trashing registers that have just been written
5379 or will be written when we retire the current instruction. */
5380 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5381 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5382 & IEMLIVENESSBIT_MASK;
5383#endif
5384 /* If it matches any shadowed registers. */
5385 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
5386 {
5387 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
5388 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
5389 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
5390
5391 /* See if we've got any unshadowed registers we can return now. */
5392 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
5393 if (fUnshadowedRegs)
5394 {
5395 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
5396 return (fPreferVolatile
5397 ? ASMBitFirstSetU32(fUnshadowedRegs)
5398 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
5399 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
5400 - 1;
5401 }
5402 }
5403 }
5404#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5405
5406 unsigned const idxReg = (fPreferVolatile
5407 ? ASMBitFirstSetU32(fRegs)
5408 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5409 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
5410 - 1;
5411
5412 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
5413 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
5414 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5415 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
5416 Assert(pReNative->Core.aHstSimdRegs[idxReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5417
5418 /* We need to flush any pending guest register writes this host SIMD register shadows. */
5419 uint32_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5420 uint32_t idxGstSimdReg = 0;
5421 do
5422 {
5423 if (fGstRegShadows & 0x1)
5424 *poff = iemNativeSimdRegFlushPendingWrite(pReNative, *poff, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5425 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg));
5426 idxGstSimdReg++;
5427 fGstRegShadows >>= 1;
5428 } while (fGstRegShadows);
5429
5430 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5431 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5432 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5433 pReNative->Core.aHstSimdRegs[idxReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5434 return idxReg;
5435 }
5436
5437 /*
5438 * Try free up a variable that's in a register.
5439 *
5440 * We do two rounds here, first evacuating variables we don't need to be
5441 * saved on the stack, then in the second round move things to the stack.
5442 */
5443 //STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
5444 AssertReleaseFailed(); /** @todo No variable support right now. */
5445#if 0
5446 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
5447 {
5448 uint32_t fVars = pReNative->Core.bmSimdVars;
5449 while (fVars)
5450 {
5451 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
5452 uint8_t const idxReg = pReNative->Core.aSimdVars[idxVar].idxReg;
5453 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
5454 && (RT_BIT_32(idxReg) & fRegMask)
5455 && ( iLoop == 0
5456 ? pReNative->Core.aSimdVars[idxVar].enmKind != kIemNativeVarKind_Stack
5457 : pReNative->Core.aSimdVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5458 && !pReNative->Core.aSimdVars[idxVar].fRegAcquired)
5459 {
5460 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
5461 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
5462 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5463 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5464 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
5465 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
5466
5467 if (pReNative->Core.aSimdVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5468 {
5469 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
5470 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
5471 }
5472
5473 pReNative->Core.aSimdVars[idxVar].idxReg = UINT8_MAX;
5474 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
5475
5476 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5477 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5478 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5479 return idxReg;
5480 }
5481 fVars &= ~RT_BIT_32(idxVar);
5482 }
5483 }
5484#endif
5485
5486 AssertFailed();
5487 return UINT8_MAX;
5488}
5489
5490
5491/**
5492 * Marks host SIMD register @a idxHstSimdReg as containing a shadow copy of guest
5493 * SIMD register @a enmGstSimdReg.
5494 *
5495 * ASSUMES that caller has made sure @a enmGstSimdReg is not associated with any
5496 * host register before calling.
5497 */
5498DECL_FORCE_INLINE(void)
5499iemNativeSimdRegMarkAsGstSimdRegShadow(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg, IEMNATIVEGSTSIMDREG enmGstSimdReg, uint32_t off)
5500{
5501 Assert(!(pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg)));
5502 Assert(!pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5503 Assert((unsigned)enmGstSimdReg < (unsigned)kIemNativeGstSimdReg_End);
5504
5505 pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg] = idxHstSimdReg;
5506 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows |= RT_BIT_64(enmGstSimdReg);
5507 pReNative->Core.bmGstSimdRegShadows |= RT_BIT_64(enmGstSimdReg);
5508 pReNative->Core.bmHstSimdRegsWithGstShadow |= RT_BIT_32(idxHstSimdReg);
5509#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5510 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5511 iemNativeDbgInfoAddGuestSimdRegShadowing(pReNative, enmGstSimdReg, idxHstSimdReg);
5512#else
5513 RT_NOREF(off);
5514#endif
5515}
5516
5517
5518/**
5519 * Transfers the guest SIMD register shadow claims of @a enmGstSimdReg from @a idxSimdRegFrom
5520 * to @a idxSimdRegTo.
5521 */
5522DECL_FORCE_INLINE(void)
5523iemNativeSimdRegTransferGstSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxSimdRegFrom, uint8_t idxSimdRegTo,
5524 IEMNATIVEGSTSIMDREG enmGstSimdReg, uint32_t off)
5525{
5526 Assert(pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
5527 Assert(pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg] == idxSimdRegFrom);
5528 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows)
5529 == pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows
5530 && pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5531 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxSimdRegTo].fGstRegShadows)
5532 == pReNative->Core.aHstSimdRegs[idxSimdRegTo].fGstRegShadows);
5533 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdRegFrom))
5534 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows));
5535 Assert( pReNative->Core.aHstSimdRegs[idxSimdRegFrom].enmLoaded
5536 == pReNative->Core.aHstSimdRegs[idxSimdRegTo].enmLoaded);
5537
5538
5539 uint64_t const fGstRegShadowsFrom = pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows & ~RT_BIT_64(enmGstSimdReg);
5540 pReNative->Core.aHstSimdRegs[idxSimdRegFrom].fGstRegShadows = fGstRegShadowsFrom;
5541 if (!fGstRegShadowsFrom)
5542 {
5543 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxSimdRegFrom);
5544 pReNative->Core.aHstSimdRegs[idxSimdRegFrom].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5545 }
5546 pReNative->Core.bmHstSimdRegsWithGstShadow |= RT_BIT_32(idxSimdRegTo);
5547 pReNative->Core.aHstSimdRegs[idxSimdRegTo].fGstRegShadows |= RT_BIT_64(enmGstSimdReg);
5548 pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg] = idxSimdRegTo;
5549#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5550 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5551 iemNativeDbgInfoAddGuestSimdRegShadowing(pReNative, enmGstSimdReg, idxSimdRegTo, idxSimdRegFrom);
5552#else
5553 RT_NOREF(off);
5554#endif
5555}
5556
5557
5558/**
5559 * Clear any guest register shadow claims from @a idxHstSimdReg.
5560 *
5561 * The register does not need to be shadowing any guest registers.
5562 */
5563DECL_FORCE_INLINE(void)
5564iemNativeSimdRegClearGstSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg, uint32_t off)
5565{
5566 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5567 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows
5568 && pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstSimdReg_End));
5569 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstSimdReg))
5570 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5571 Assert( !(pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadowDirtyLo128)
5572 && !(pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadowDirtyHi128));
5573
5574#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5575 uint64_t fGstRegs = pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
5576 if (fGstRegs)
5577 {
5578 Assert(fGstRegs < RT_BIT_64(kIemNativeGstSimdReg_End));
5579 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5580 while (fGstRegs)
5581 {
5582 unsigned const iGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
5583 fGstRegs &= ~RT_BIT_64(iGstReg);
5584 iemNativeDbgInfoAddGuestSimdRegShadowing(pReNative, (IEMNATIVEGSTSIMDREG)iGstReg, UINT8_MAX, idxHstSimdReg);
5585 }
5586 }
5587#else
5588 RT_NOREF(off);
5589#endif
5590
5591 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstSimdReg);
5592 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
5593 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5594 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5595}
5596
5597
5598/**
5599 * Flushes a set of guest register shadow copies.
5600 *
5601 * This is usually done after calling a threaded function or a C-implementation
5602 * of an instruction.
5603 *
5604 * @param pReNative The native recompile state.
5605 * @param fGstSimdRegs Set of guest SIMD registers to flush.
5606 */
5607DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
5608{
5609 /*
5610 * Reduce the mask by what's currently shadowed
5611 */
5612 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
5613 fGstSimdRegs &= bmGstSimdRegShadows;
5614 if (fGstSimdRegs)
5615 {
5616 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
5617 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
5618 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
5619 if (bmGstSimdRegShadowsNew)
5620 {
5621 /*
5622 * Partial.
5623 */
5624 do
5625 {
5626 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5627 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5628 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5629 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5630 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5631 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5632
5633 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
5634 fGstSimdRegs &= ~fInThisHstReg;
5635 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5636 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5637 if (!fGstRegShadowsNew)
5638 {
5639 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5640 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5641 }
5642 } while (fGstSimdRegs != 0);
5643 }
5644 else
5645 {
5646 /*
5647 * Clear all.
5648 */
5649 do
5650 {
5651 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5652 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5653 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5654 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5655 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5656 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5657
5658 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5659 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
5660 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5661 } while (fGstSimdRegs != 0);
5662 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
5663 }
5664 }
5665}
5666
5667
5668/**
5669 * Allocates a temporary host SIMD register.
5670 *
5671 * This may emit code to save register content onto the stack in order to free
5672 * up a register.
5673 *
5674 * @returns The host register number; throws VBox status code on failure,
5675 * so no need to check the return value.
5676 * @param pReNative The native recompile state.
5677 * @param poff Pointer to the variable with the code buffer position.
5678 * This will be update if we need to move a variable from
5679 * register to stack in order to satisfy the request.
5680 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5681 * registers (@c true, default) or the other way around
5682 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5683 */
5684DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
5685{
5686 /*
5687 * Try find a completely unused register, preferably a call-volatile one.
5688 */
5689 uint8_t idxSimdReg;
5690 uint32_t fRegs = ~pReNative->Core.bmHstRegs
5691 & ~pReNative->Core.bmHstRegsWithGstShadow
5692 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
5693 if (fRegs)
5694 {
5695 if (fPreferVolatile)
5696 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5697 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5698 else
5699 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5700 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5701 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5702 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5703 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5704 }
5705 else
5706 {
5707 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
5708 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5709 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5710 }
5711
5712 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5713 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5714}
5715
5716
5717/**
5718 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
5719 * registers.
5720 *
5721 * @returns The host register number; throws VBox status code on failure,
5722 * so no need to check the return value.
5723 * @param pReNative The native recompile state.
5724 * @param poff Pointer to the variable with the code buffer position.
5725 * This will be update if we need to move a variable from
5726 * register to stack in order to satisfy the request.
5727 * @param fRegMask Mask of acceptable registers.
5728 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5729 * registers (@c true, default) or the other way around
5730 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5731 */
5732DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
5733 bool fPreferVolatile /*= true*/)
5734{
5735 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5736 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5737
5738 /*
5739 * Try find a completely unused register, preferably a call-volatile one.
5740 */
5741 uint8_t idxSimdReg;
5742 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
5743 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5744 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
5745 & fRegMask;
5746 if (fRegs)
5747 {
5748 if (fPreferVolatile)
5749 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5750 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5751 else
5752 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5753 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5754 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5755 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5756 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5757 }
5758 else
5759 {
5760 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
5761 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5762 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5763 }
5764
5765 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5766 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5767}
5768
5769
5770static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdRegDst,
5771 uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
5772{
5773 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
5774 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegDst].enmLoaded == enmLoadSzDst
5775 || pReNative->Core.aHstSimdRegs[idxHstSimdRegDst].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5776 {
5777# ifdef RT_ARCH_ARM64
5778 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
5779 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
5780# endif
5781
5782 switch (enmLoadSzDst)
5783 {
5784 case kIemNativeGstSimdRegLdStSz_256:
5785 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5786 break;
5787 case kIemNativeGstSimdRegLdStSz_Low128:
5788 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5789 break;
5790 case kIemNativeGstSimdRegLdStSz_High128:
5791 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst + 1, idxHstSimdRegSrc + 1);
5792 break;
5793 default:
5794 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5795 }
5796
5797 pReNative->Core.aHstSimdRegs[idxHstSimdRegDst].enmLoaded = enmLoadSzDst;
5798 }
5799 else
5800 {
5801 /* Complicated stuff where the source is currently missing something, later. */
5802 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5803 }
5804
5805 return off;
5806}
5807
5808
5809/**
5810 * Allocates a temporary host SIMD register for keeping a guest
5811 * SIMD register value.
5812 *
5813 * Since we may already have a register holding the guest register value,
5814 * code will be emitted to do the loading if that's not the case. Code may also
5815 * be emitted if we have to free up a register to satify the request.
5816 *
5817 * @returns The host register number; throws VBox status code on failure, so no
5818 * need to check the return value.
5819 * @param pReNative The native recompile state.
5820 * @param poff Pointer to the variable with the code buffer
5821 * position. This will be update if we need to move a
5822 * variable from register to stack in order to satisfy
5823 * the request.
5824 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
5825 * @param enmIntendedUse How the caller will be using the host register.
5826 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
5827 * register is okay (default). The ASSUMPTION here is
5828 * that the caller has already flushed all volatile
5829 * registers, so this is only applied if we allocate a
5830 * new register.
5831 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
5832 */
5833DECL_HIDDEN_THROW(uint8_t)
5834iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
5835 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
5836 bool fNoVolatileRegs /*= false*/)
5837{
5838 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
5839#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
5840 AssertMsg( pReNative->idxCurCall == 0
5841 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5842 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5843 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
5844 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5845 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
5846 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
5847#endif
5848#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5849 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
5850#endif
5851 uint32_t const fRegMask = !fNoVolatileRegs
5852 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
5853 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
5854
5855 /*
5856 * First check if the guest register value is already in a host register.
5857 */
5858 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
5859 {
5860 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5861 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
5862 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
5863 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
5864
5865 /* It's not supposed to be allocated... */
5866 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
5867 {
5868 /*
5869 * If the register will trash the guest shadow copy, try find a
5870 * completely unused register we can use instead. If that fails,
5871 * we need to disassociate the host reg from the guest reg.
5872 */
5873 /** @todo would be nice to know if preserving the register is in any way helpful. */
5874 /* If the purpose is calculations, try duplicate the register value as
5875 we'll be clobbering the shadow. */
5876 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
5877 && ( ~pReNative->Core.bmHstSimdRegs
5878 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5879 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
5880 {
5881 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
5882
5883 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5884
5885 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5886 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5887 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5888 idxSimdReg = idxRegNew;
5889 }
5890 /* If the current register matches the restrictions, go ahead and allocate
5891 it for the caller. */
5892 else if (fRegMask & RT_BIT_32(idxSimdReg))
5893 {
5894 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5895 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
5896 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5897 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
5898 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5899 else
5900 {
5901 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
5902 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
5903 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
5904 }
5905 }
5906 /* Otherwise, allocate a register that satisfies the caller and transfer
5907 the shadowing if compatible with the intended use. (This basically
5908 means the call wants a non-volatile register (RSP push/pop scenario).) */
5909 else
5910 {
5911 Assert(fNoVolatileRegs);
5912 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
5913 !fNoVolatileRegs
5914 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
5915 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5916 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5917 {
5918 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5919 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
5920 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
5921 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5922 }
5923 else
5924 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5925 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5926 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5927 idxSimdReg = idxRegNew;
5928 }
5929 }
5930 else
5931 {
5932 /*
5933 * Oops. Shadowed guest register already allocated!
5934 *
5935 * Allocate a new register, copy the value and, if updating, the
5936 * guest shadow copy assignment to the new register.
5937 */
5938 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5939 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
5940 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
5941 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
5942
5943 /** @todo share register for readonly access. */
5944 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
5945 enmIntendedUse == kIemNativeGstRegUse_Calculation);
5946
5947 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5948 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, idxRegNew, idxSimdReg, enmLoadSz);
5949 else
5950 {
5951 /** @todo This is a bit unsafe to mark the register already as loaded even though there is nothing written to it yet. */
5952 pReNative->Core.aHstSimdRegs[idxRegNew].enmLoaded = enmLoadSz;
5953 }
5954
5955 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5956 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5957 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
5958 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5959 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5960 else
5961 {
5962 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5963 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
5964 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5965 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5966 }
5967 idxSimdReg = idxRegNew;
5968 }
5969 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
5970
5971#ifdef VBOX_STRICT
5972 /* Strict builds: Check that the value is correct. */
5973 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
5974#endif
5975
5976 return idxSimdReg;
5977 }
5978
5979 /*
5980 * Allocate a new register, load it with the guest value and designate it as a copy of the
5981 */
5982 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
5983
5984 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5985 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
5986 else
5987 {
5988 /** @todo This is a bit unsafe to mark the register already as loaded even though there is nothing written to it yet. */
5989 pReNative->Core.aHstSimdRegs[idxRegNew].enmLoaded = enmLoadSz;
5990 }
5991
5992 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5993 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
5994
5995 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
5996 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5997
5998 return idxRegNew;
5999}
6000
6001#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6002
6003
6004
6005/*********************************************************************************************************************************
6006* Code emitters for flushing pending guest register writes and sanity checks *
6007*********************************************************************************************************************************/
6008
6009/**
6010 * Flushes delayed write of a specific guest register.
6011 *
6012 * This must be called prior to calling CImpl functions and any helpers that use
6013 * the guest state (like raising exceptions) and such.
6014 *
6015 * This optimization has not yet been implemented. The first target would be
6016 * RIP updates, since these are the most common ones.
6017 */
6018DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6019 IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
6020{
6021#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6022 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
6023#endif
6024
6025#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6026 if ( enmClass == kIemNativeGstRegRef_XReg
6027 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxReg))
6028 {
6029 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxReg));
6030 /* Flush the shadows as the register needs to be reloaded (there is no guarantee right now, that the referenced register doesn't change). */
6031 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxReg];
6032
6033 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
6034 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxReg)));
6035 }
6036#endif
6037 RT_NOREF(pReNative, enmClass, idxReg);
6038 return off;
6039}
6040
6041
6042/**
6043 * Flushes any delayed guest register writes.
6044 *
6045 * This must be called prior to calling CImpl functions and any helpers that use
6046 * the guest state (like raising exceptions) and such.
6047 *
6048 * This optimization has not yet been implemented. The first target would be
6049 * RIP updates, since these are the most common ones.
6050 */
6051DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushPendingWrites(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept /*= 0*/,
6052 bool fFlushShadows /*= true*/)
6053{
6054#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6055 if (!(fGstShwExcept & kIemNativeGstReg_Pc))
6056 off = iemNativeEmitPcWriteback(pReNative, off);
6057#else
6058 RT_NOREF(pReNative, fGstShwExcept);
6059#endif
6060
6061#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6062 /** @todo This doesn't mix well with fGstShwExcept but we ignore this for now and just flush everything. */
6063 for (uint8_t idxGstSimdReg = 0; idxGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo); idxGstSimdReg++)
6064 {
6065 Assert( (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg)
6066 || !IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg)));
6067
6068 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg))
6069 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
6070
6071 if ( fFlushShadows
6072 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxGstSimdReg))
6073 {
6074 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxGstSimdReg];
6075
6076 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
6077 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg)));
6078 }
6079 }
6080#else
6081 RT_NOREF(pReNative, fGstShwExcept, fFlushShadows);
6082#endif
6083
6084 return off;
6085}
6086
6087
6088#ifdef VBOX_STRICT
6089/**
6090 * Does internal register allocator sanity checks.
6091 */
6092static void iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
6093{
6094 /*
6095 * Iterate host registers building a guest shadowing set.
6096 */
6097 uint64_t bmGstRegShadows = 0;
6098 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
6099 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
6100 while (bmHstRegsWithGstShadow)
6101 {
6102 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
6103 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
6104 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
6105
6106 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
6107 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
6108 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
6109 bmGstRegShadows |= fThisGstRegShadows;
6110 while (fThisGstRegShadows)
6111 {
6112 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
6113 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
6114 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
6115 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
6116 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
6117 }
6118 }
6119 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
6120 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
6121 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
6122
6123 /*
6124 * Now the other way around, checking the guest to host index array.
6125 */
6126 bmHstRegsWithGstShadow = 0;
6127 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
6128 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
6129 while (bmGstRegShadows)
6130 {
6131 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
6132 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
6133 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
6134
6135 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
6136 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
6137 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
6138 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
6139 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
6140 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
6141 }
6142 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
6143 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
6144 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
6145}
6146#endif
6147
6148
6149/*********************************************************************************************************************************
6150* Code Emitters (larger snippets) *
6151*********************************************************************************************************************************/
6152
6153/**
6154 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
6155 * extending to 64-bit width.
6156 *
6157 * @returns New code buffer offset on success, UINT32_MAX on failure.
6158 * @param pReNative .
6159 * @param off The current code buffer position.
6160 * @param idxHstReg The host register to load the guest register value into.
6161 * @param enmGstReg The guest register to load.
6162 *
6163 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6164 * that is something the caller needs to do if applicable.
6165 */
6166DECL_HIDDEN_THROW(uint32_t)
6167iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
6168{
6169 Assert((unsigned)enmGstReg < RT_ELEMENTS(g_aGstShadowInfo));
6170 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
6171
6172 switch (g_aGstShadowInfo[enmGstReg].cb)
6173 {
6174 case sizeof(uint64_t):
6175 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6176 case sizeof(uint32_t):
6177 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6178 case sizeof(uint16_t):
6179 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6180#if 0 /* not present in the table. */
6181 case sizeof(uint8_t):
6182 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6183#endif
6184 default:
6185 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6186 }
6187}
6188
6189
6190#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6191/**
6192 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
6193 *
6194 * @returns New code buffer offset on success, UINT32_MAX on failure.
6195 * @param pReNative The recompiler state.
6196 * @param off The current code buffer position.
6197 * @param idxHstSimdReg The host register to load the guest register value into.
6198 * @param enmGstSimdReg The guest register to load.
6199 * @param enmLoadSz The load size of the register.
6200 *
6201 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6202 * that is something the caller needs to do if applicable.
6203 */
6204DECL_HIDDEN_THROW(uint32_t)
6205iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
6206 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6207{
6208 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
6209
6210 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
6211 switch (enmLoadSz)
6212 {
6213 case kIemNativeGstSimdRegLdStSz_256:
6214 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6215 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6216 case kIemNativeGstSimdRegLdStSz_Low128:
6217 return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6218 case kIemNativeGstSimdRegLdStSz_High128:
6219 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6220 default:
6221 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6222 }
6223}
6224#endif
6225
6226#ifdef VBOX_STRICT
6227/**
6228 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
6229 *
6230 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6231 * Trashes EFLAGS on AMD64.
6232 */
6233static uint32_t
6234iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
6235{
6236# ifdef RT_ARCH_AMD64
6237 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6238
6239 /* rol reg64, 32 */
6240 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6241 pbCodeBuf[off++] = 0xc1;
6242 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6243 pbCodeBuf[off++] = 32;
6244
6245 /* test reg32, ffffffffh */
6246 if (idxReg >= 8)
6247 pbCodeBuf[off++] = X86_OP_REX_B;
6248 pbCodeBuf[off++] = 0xf7;
6249 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6250 pbCodeBuf[off++] = 0xff;
6251 pbCodeBuf[off++] = 0xff;
6252 pbCodeBuf[off++] = 0xff;
6253 pbCodeBuf[off++] = 0xff;
6254
6255 /* je/jz +1 */
6256 pbCodeBuf[off++] = 0x74;
6257 pbCodeBuf[off++] = 0x01;
6258
6259 /* int3 */
6260 pbCodeBuf[off++] = 0xcc;
6261
6262 /* rol reg64, 32 */
6263 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6264 pbCodeBuf[off++] = 0xc1;
6265 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6266 pbCodeBuf[off++] = 32;
6267
6268# elif defined(RT_ARCH_ARM64)
6269 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6270 /* lsr tmp0, reg64, #32 */
6271 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
6272 /* cbz tmp0, +1 */
6273 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6274 /* brk #0x1100 */
6275 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
6276
6277# else
6278# error "Port me!"
6279# endif
6280 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6281 return off;
6282}
6283#endif /* VBOX_STRICT */
6284
6285
6286#ifdef VBOX_STRICT
6287/**
6288 * Emitting code that checks that the content of register @a idxReg is the same
6289 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
6290 * instruction if that's not the case.
6291 *
6292 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6293 * Trashes EFLAGS on AMD64.
6294 */
6295static uint32_t
6296iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
6297{
6298# ifdef RT_ARCH_AMD64
6299 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6300
6301 /* cmp reg, [mem] */
6302 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
6303 {
6304 if (idxReg >= 8)
6305 pbCodeBuf[off++] = X86_OP_REX_R;
6306 pbCodeBuf[off++] = 0x38;
6307 }
6308 else
6309 {
6310 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
6311 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
6312 else
6313 {
6314 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
6315 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6316 else
6317 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
6318 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
6319 if (idxReg >= 8)
6320 pbCodeBuf[off++] = X86_OP_REX_R;
6321 }
6322 pbCodeBuf[off++] = 0x39;
6323 }
6324 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
6325
6326 /* je/jz +1 */
6327 pbCodeBuf[off++] = 0x74;
6328 pbCodeBuf[off++] = 0x01;
6329
6330 /* int3 */
6331 pbCodeBuf[off++] = 0xcc;
6332
6333 /* For values smaller than the register size, we must check that the rest
6334 of the register is all zeros. */
6335 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
6336 {
6337 /* test reg64, imm32 */
6338 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6339 pbCodeBuf[off++] = 0xf7;
6340 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6341 pbCodeBuf[off++] = 0;
6342 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
6343 pbCodeBuf[off++] = 0xff;
6344 pbCodeBuf[off++] = 0xff;
6345
6346 /* je/jz +1 */
6347 pbCodeBuf[off++] = 0x74;
6348 pbCodeBuf[off++] = 0x01;
6349
6350 /* int3 */
6351 pbCodeBuf[off++] = 0xcc;
6352 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6353 }
6354 else
6355 {
6356 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6357 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
6358 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
6359 }
6360
6361# elif defined(RT_ARCH_ARM64)
6362 /* mov TMP0, [gstreg] */
6363 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
6364
6365 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6366 /* sub tmp0, tmp0, idxReg */
6367 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
6368 /* cbz tmp0, +1 */
6369 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6370 /* brk #0x1000+enmGstReg */
6371 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
6372 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6373
6374# else
6375# error "Port me!"
6376# endif
6377 return off;
6378}
6379
6380
6381# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6382/**
6383 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
6384 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
6385 * instruction if that's not the case.
6386 *
6387 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
6388 * Trashes EFLAGS on AMD64.
6389 */
6390static uint32_t
6391iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg, IEMNATIVEGSTSIMDREG enmGstSimdReg,
6392 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6393{
6394# ifdef RT_ARCH_AMD64
6395 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128); /** @todo 256-bit variant. */
6396
6397 /* movdqa vectmp0, idxSimdReg */
6398 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6399
6400 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
6401
6402 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
6403 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6404 if (idxSimdReg >= 8)
6405 pbCodeBuf[off++] = X86_OP_REX_R;
6406 pbCodeBuf[off++] = 0x0f;
6407 pbCodeBuf[off++] = 0x38;
6408 pbCodeBuf[off++] = 0x29;
6409 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6410
6411 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
6412 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6413 pbCodeBuf[off++] = X86_OP_REX_W
6414 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
6415 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6416 pbCodeBuf[off++] = 0x0f;
6417 pbCodeBuf[off++] = 0x3a;
6418 pbCodeBuf[off++] = 0x16;
6419 pbCodeBuf[off++] = 0xeb;
6420 pbCodeBuf[off++] = 0x00;
6421
6422 /* test tmp0, 0xffffffff. */
6423 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6424 pbCodeBuf[off++] = 0xf7;
6425 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, IEMNATIVE_REG_FIXED_TMP0 & 7);
6426 pbCodeBuf[off++] = 0xff;
6427 pbCodeBuf[off++] = 0xff;
6428 pbCodeBuf[off++] = 0xff;
6429 pbCodeBuf[off++] = 0xff;
6430
6431 /* je/jz +1 */
6432 pbCodeBuf[off++] = 0x74;
6433 pbCodeBuf[off++] = 0x01;
6434
6435 /* int3 */
6436 pbCodeBuf[off++] = 0xcc;
6437
6438 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
6439 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6440 pbCodeBuf[off++] = X86_OP_REX_W
6441 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_R)
6442 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6443 pbCodeBuf[off++] = 0x0f;
6444 pbCodeBuf[off++] = 0x3a;
6445 pbCodeBuf[off++] = 0x16;
6446 pbCodeBuf[off++] = 0xeb;
6447 pbCodeBuf[off++] = 0x01;
6448
6449 /* test tmp0, 0xffffffff. */
6450 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6451 pbCodeBuf[off++] = 0xf7;
6452 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, IEMNATIVE_REG_FIXED_TMP0 & 7);
6453 pbCodeBuf[off++] = 0xff;
6454 pbCodeBuf[off++] = 0xff;
6455 pbCodeBuf[off++] = 0xff;
6456 pbCodeBuf[off++] = 0xff;
6457
6458 /* je/jz +1 */
6459 pbCodeBuf[off++] = 0x74;
6460 pbCodeBuf[off++] = 0x01;
6461
6462 /* int3 */
6463 pbCodeBuf[off++] = 0xcc;
6464
6465# elif defined(RT_ARCH_ARM64)
6466 /* mov vectmp0, [gstreg] */
6467 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
6468
6469 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6470 {
6471 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6472 /* eor vectmp0, vectmp0, idxSimdReg */
6473 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6474 /* cnt vectmp0, vectmp0, #0*/
6475 pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0);
6476 /* umov tmp0, vectmp0.D[0] */
6477 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6478 0 /*idxElem*/, kArmv8InstrUmovSz_U64);
6479 /* cbz tmp0, +1 */
6480 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6481 /* brk #0x1000+enmGstReg */
6482 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6483 }
6484
6485 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6486 {
6487 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6488 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
6489 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg);
6490 /* cnt vectmp0 + 1, vectmp0 + 1, #0*/
6491 pu32CodeBuf[off++] = Armv8A64MkVecInstrCnt(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1);
6492 /* umov tmp0, (vectmp0 + 1).D[0] */
6493 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
6494 0 /*idxElem*/, kArmv8InstrUmovSz_U64);
6495 /* cbz tmp0, +1 */
6496 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6497 /* brk #0x1000+enmGstReg */
6498 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6499 }
6500
6501# else
6502# error "Port me!"
6503# endif
6504
6505 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6506 return off;
6507}
6508# endif
6509#endif /* VBOX_STRICT */
6510
6511
6512#ifdef VBOX_STRICT
6513/**
6514 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
6515 * important bits.
6516 *
6517 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6518 * Trashes EFLAGS on AMD64.
6519 */
6520static uint32_t
6521iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
6522{
6523 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6524 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
6525 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
6526 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
6527
6528#ifdef RT_ARCH_AMD64
6529 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6530
6531 /* je/jz +1 */
6532 pbCodeBuf[off++] = 0x74;
6533 pbCodeBuf[off++] = 0x01;
6534
6535 /* int3 */
6536 pbCodeBuf[off++] = 0xcc;
6537
6538# elif defined(RT_ARCH_ARM64)
6539 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6540
6541 /* b.eq +1 */
6542 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
6543 /* brk #0x2000 */
6544 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
6545
6546# else
6547# error "Port me!"
6548# endif
6549 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6550
6551 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6552 return off;
6553}
6554#endif /* VBOX_STRICT */
6555
6556
6557/**
6558 * Emits a code for checking the return code of a call and rcPassUp, returning
6559 * from the code if either are non-zero.
6560 */
6561DECL_HIDDEN_THROW(uint32_t)
6562iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6563{
6564#ifdef RT_ARCH_AMD64
6565 /*
6566 * AMD64: eax = call status code.
6567 */
6568
6569 /* edx = rcPassUp */
6570 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6571# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6572 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
6573# endif
6574
6575 /* edx = eax | rcPassUp */
6576 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6577 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
6578 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
6579 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6580
6581 /* Jump to non-zero status return path. */
6582 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
6583
6584 /* done. */
6585
6586#elif RT_ARCH_ARM64
6587 /*
6588 * ARM64: w0 = call status code.
6589 */
6590# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6591 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
6592# endif
6593 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6594
6595 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6596
6597 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
6598
6599 uint32_t const idxLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6600 iemNativeAddFixup(pReNative, off, idxLabel, kIemNativeFixupType_RelImm19At5);
6601 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(true /*fJmpIfNotZero*/, 0, ARMV8_A64_REG_X4, false /*f64Bit*/);
6602
6603#else
6604# error "port me"
6605#endif
6606 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6607 RT_NOREF_PV(idxInstr);
6608 return off;
6609}
6610
6611
6612/**
6613 * Emits code to check if the content of @a idxAddrReg is a canonical address,
6614 * raising a \#GP(0) if it isn't.
6615 *
6616 * @returns New code buffer offset, UINT32_MAX on failure.
6617 * @param pReNative The native recompile state.
6618 * @param off The code buffer offset.
6619 * @param idxAddrReg The host register with the address to check.
6620 * @param idxInstr The current instruction.
6621 */
6622DECL_HIDDEN_THROW(uint32_t)
6623iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
6624{
6625 /*
6626 * Make sure we don't have any outstanding guest register writes as we may
6627 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6628 */
6629 off = iemNativeRegFlushPendingWrites(pReNative, off);
6630
6631#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6632 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6633#else
6634 RT_NOREF(idxInstr);
6635#endif
6636
6637#ifdef RT_ARCH_AMD64
6638 /*
6639 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
6640 * return raisexcpt();
6641 * ---- this wariant avoid loading a 64-bit immediate, but is an instruction longer.
6642 */
6643 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6644
6645 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
6646 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
6647 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
6648 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
6649 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6650
6651 iemNativeRegFreeTmp(pReNative, iTmpReg);
6652
6653#elif defined(RT_ARCH_ARM64)
6654 /*
6655 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
6656 * return raisexcpt();
6657 * ----
6658 * mov x1, 0x800000000000
6659 * add x1, x0, x1
6660 * cmp xzr, x1, lsr 48
6661 * b.ne .Lraisexcpt
6662 */
6663 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
6664
6665 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
6666 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
6667 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
6668 off = iemNativeEmitJnzToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6669
6670 iemNativeRegFreeTmp(pReNative, iTmpReg);
6671
6672#else
6673# error "Port me"
6674#endif
6675 return off;
6676}
6677
6678
6679/**
6680 * Emits code to check if that the content of @a idxAddrReg is within the limit
6681 * of CS, raising a \#GP(0) if it isn't.
6682 *
6683 * @returns New code buffer offset; throws VBox status code on error.
6684 * @param pReNative The native recompile state.
6685 * @param off The code buffer offset.
6686 * @param idxAddrReg The host register (32-bit) with the address to
6687 * check.
6688 * @param idxInstr The current instruction.
6689 */
6690DECL_HIDDEN_THROW(uint32_t)
6691iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6692 uint8_t idxAddrReg, uint8_t idxInstr)
6693{
6694 /*
6695 * Make sure we don't have any outstanding guest register writes as we may
6696 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
6697 */
6698 off = iemNativeRegFlushPendingWrites(pReNative, off);
6699
6700#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6701 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6702#else
6703 RT_NOREF(idxInstr);
6704#endif
6705
6706 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
6707 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
6708 kIemNativeGstRegUse_ReadOnly);
6709
6710 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
6711 off = iemNativeEmitJaToNewLabel(pReNative, off, kIemNativeLabelType_RaiseGp0);
6712
6713 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
6714 return off;
6715}
6716
6717
6718/**
6719 * Converts IEM_CIMPL_F_XXX flags into a guest register shadow copy flush mask.
6720 *
6721 * @returns The flush mask.
6722 * @param fCImpl The IEM_CIMPL_F_XXX flags.
6723 * @param fGstShwFlush The starting flush mask.
6724 */
6725DECL_FORCE_INLINE(uint64_t) iemNativeCImplFlagsToGuestShadowFlushMask(uint32_t fCImpl, uint64_t fGstShwFlush)
6726{
6727 if (fCImpl & IEM_CIMPL_F_BRANCH_FAR)
6728 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_CS)
6729 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_CS)
6730 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS);
6731 if (fCImpl & IEM_CIMPL_F_BRANCH_STACK_FAR)
6732 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP)
6733 | RT_BIT_64(kIemNativeGstReg_SegSelFirst + X86_SREG_SS)
6734 | RT_BIT_64(kIemNativeGstReg_SegBaseFirst + X86_SREG_SS)
6735 | RT_BIT_64(kIemNativeGstReg_SegLimitFirst + X86_SREG_SS);
6736 else if (fCImpl & IEM_CIMPL_F_BRANCH_STACK)
6737 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_GprFirst + X86_GREG_xSP);
6738 if (fCImpl & (IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_STATUS_FLAGS | IEM_CIMPL_F_INHIBIT_SHADOW))
6739 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
6740 return fGstShwFlush;
6741}
6742
6743
6744/**
6745 * Emits a call to a CImpl function or something similar.
6746 */
6747DECL_HIDDEN_THROW(uint32_t)
6748iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
6749 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
6750{
6751 /* Writeback everything. */
6752 off = iemNativeRegFlushPendingWrites(pReNative, off);
6753
6754 /*
6755 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
6756 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
6757 */
6758 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
6759 fGstShwFlush
6760 | RT_BIT_64(kIemNativeGstReg_Pc)
6761 | RT_BIT_64(kIemNativeGstReg_EFlags));
6762 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6763
6764 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6765
6766 /*
6767 * Load the parameters.
6768 */
6769#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
6770 /* Special code the hidden VBOXSTRICTRC pointer. */
6771 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6772 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6773 if (cAddParams > 0)
6774 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
6775 if (cAddParams > 1)
6776 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
6777 if (cAddParams > 2)
6778 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
6779 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6780
6781#else
6782 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6783 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6784 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6785 if (cAddParams > 0)
6786 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
6787 if (cAddParams > 1)
6788 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
6789 if (cAddParams > 2)
6790# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
6791 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
6792# else
6793 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
6794# endif
6795#endif
6796
6797 /*
6798 * Make the call.
6799 */
6800 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
6801
6802#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6803 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6804#endif
6805
6806 /*
6807 * Check the status code.
6808 */
6809 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
6810}
6811
6812
6813/**
6814 * Emits a call to a threaded worker function.
6815 */
6816DECL_HIDDEN_THROW(uint32_t)
6817iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6818{
6819 /* We don't know what the threaded function is doing so we must flush all pending writes. */
6820 off = iemNativeRegFlushPendingWrites(pReNative, off);
6821
6822 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
6823 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6824
6825#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6826 /* The threaded function may throw / long jmp, so set current instruction
6827 number if we're counting. */
6828 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6829#endif
6830
6831 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
6832
6833#ifdef RT_ARCH_AMD64
6834 /* Load the parameters and emit the call. */
6835# ifdef RT_OS_WINDOWS
6836# ifndef VBOXSTRICTRC_STRICT_ENABLED
6837 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6838 if (cParams > 0)
6839 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
6840 if (cParams > 1)
6841 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
6842 if (cParams > 2)
6843 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
6844# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
6845 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
6846 if (cParams > 0)
6847 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
6848 if (cParams > 1)
6849 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
6850 if (cParams > 2)
6851 {
6852 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
6853 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
6854 }
6855 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6856# endif /* VBOXSTRICTRC_STRICT_ENABLED */
6857# else
6858 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6859 if (cParams > 0)
6860 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
6861 if (cParams > 1)
6862 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
6863 if (cParams > 2)
6864 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
6865# endif
6866
6867 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6868
6869# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6870 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6871# endif
6872
6873#elif RT_ARCH_ARM64
6874 /*
6875 * ARM64:
6876 */
6877 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6878 if (cParams > 0)
6879 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
6880 if (cParams > 1)
6881 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
6882 if (cParams > 2)
6883 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
6884
6885 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6886
6887#else
6888# error "port me"
6889#endif
6890
6891 /*
6892 * Check the status code.
6893 */
6894 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
6895
6896 return off;
6897}
6898
6899#ifdef VBOX_WITH_STATISTICS
6900/**
6901 * Emits code to update the thread call statistics.
6902 */
6903DECL_INLINE_THROW(uint32_t)
6904iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6905{
6906 /*
6907 * Update threaded function stats.
6908 */
6909 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
6910 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
6911# if defined(RT_ARCH_ARM64)
6912 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6913 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6914 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
6915 iemNativeRegFreeTmp(pReNative, idxTmp1);
6916 iemNativeRegFreeTmp(pReNative, idxTmp2);
6917# else
6918 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
6919# endif
6920 return off;
6921}
6922#endif /* VBOX_WITH_STATISTICS */
6923
6924
6925/**
6926 * Emits the code at the CheckBranchMiss label.
6927 */
6928static uint32_t iemNativeEmitCheckBranchMiss(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6929{
6930 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_CheckBranchMiss);
6931 if (idxLabel != UINT32_MAX)
6932 {
6933 iemNativeLabelDefine(pReNative, idxLabel, off);
6934
6935 /* int iemNativeHlpCheckBranchMiss(PVMCPUCC pVCpu) */
6936 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6937 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpCheckBranchMiss);
6938
6939 /* jump back to the return sequence. */
6940 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6941 }
6942 return off;
6943}
6944
6945
6946/**
6947 * Emits the code at the NeedCsLimChecking label.
6948 */
6949static uint32_t iemNativeEmitNeedCsLimChecking(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6950{
6951 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NeedCsLimChecking);
6952 if (idxLabel != UINT32_MAX)
6953 {
6954 iemNativeLabelDefine(pReNative, idxLabel, off);
6955
6956 /* int iemNativeHlpNeedCsLimChecking(PVMCPUCC pVCpu) */
6957 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6958 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpNeedCsLimChecking);
6959
6960 /* jump back to the return sequence. */
6961 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6962 }
6963 return off;
6964}
6965
6966
6967/**
6968 * Emits the code at the ObsoleteTb label.
6969 */
6970static uint32_t iemNativeEmitObsoleteTb(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6971{
6972 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ObsoleteTb);
6973 if (idxLabel != UINT32_MAX)
6974 {
6975 iemNativeLabelDefine(pReNative, idxLabel, off);
6976
6977 /* int iemNativeHlpObsoleteTb(PVMCPUCC pVCpu) */
6978 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6979 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpObsoleteTb);
6980
6981 /* jump back to the return sequence. */
6982 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6983 }
6984 return off;
6985}
6986
6987
6988/**
6989 * Emits the code at the RaiseGP0 label.
6990 */
6991static uint32_t iemNativeEmitRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6992{
6993 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseGp0);
6994 if (idxLabel != UINT32_MAX)
6995 {
6996 iemNativeLabelDefine(pReNative, idxLabel, off);
6997
6998 /* iemNativeHlpExecRaiseGp0(PVMCPUCC pVCpu) */
6999 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7000 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseGp0);
7001
7002 /* jump back to the return sequence. */
7003 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7004 }
7005 return off;
7006}
7007
7008
7009/**
7010 * Emits the code at the RaiseNm label.
7011 */
7012static uint32_t iemNativeEmitRaiseNm(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7013{
7014 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseNm);
7015 if (idxLabel != UINT32_MAX)
7016 {
7017 iemNativeLabelDefine(pReNative, idxLabel, off);
7018
7019 /* iemNativeHlpExecRaiseNm(PVMCPUCC pVCpu) */
7020 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7021 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseNm);
7022
7023 /* jump back to the return sequence. */
7024 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7025 }
7026 return off;
7027}
7028
7029
7030/**
7031 * Emits the code at the RaiseUd label.
7032 */
7033static uint32_t iemNativeEmitRaiseUd(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7034{
7035 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseUd);
7036 if (idxLabel != UINT32_MAX)
7037 {
7038 iemNativeLabelDefine(pReNative, idxLabel, off);
7039
7040 /* iemNativeHlpExecRaiseUd(PVMCPUCC pVCpu) */
7041 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7042 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseUd);
7043
7044 /* jump back to the return sequence. */
7045 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7046 }
7047 return off;
7048}
7049
7050
7051/**
7052 * Emits the code at the RaiseMf label.
7053 */
7054static uint32_t iemNativeEmitRaiseMf(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7055{
7056 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseMf);
7057 if (idxLabel != UINT32_MAX)
7058 {
7059 iemNativeLabelDefine(pReNative, idxLabel, off);
7060
7061 /* iemNativeHlpExecRaiseMf(PVMCPUCC pVCpu) */
7062 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7063 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseMf);
7064
7065 /* jump back to the return sequence. */
7066 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7067 }
7068 return off;
7069}
7070
7071
7072/**
7073 * Emits the code at the RaiseXf label.
7074 */
7075static uint32_t iemNativeEmitRaiseXf(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7076{
7077 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_RaiseXf);
7078 if (idxLabel != UINT32_MAX)
7079 {
7080 iemNativeLabelDefine(pReNative, idxLabel, off);
7081
7082 /* iemNativeHlpExecRaiseXf(PVMCPUCC pVCpu) */
7083 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7084 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecRaiseXf);
7085
7086 /* jump back to the return sequence. */
7087 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7088 }
7089 return off;
7090}
7091
7092
7093/**
7094 * Emits the code at the ReturnWithFlags label (returns
7095 * VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
7096 */
7097static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7098{
7099 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
7100 if (idxLabel != UINT32_MAX)
7101 {
7102 iemNativeLabelDefine(pReNative, idxLabel, off);
7103
7104 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
7105
7106 /* jump back to the return sequence. */
7107 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7108 }
7109 return off;
7110}
7111
7112
7113/**
7114 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
7115 */
7116static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7117{
7118 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
7119 if (idxLabel != UINT32_MAX)
7120 {
7121 iemNativeLabelDefine(pReNative, idxLabel, off);
7122
7123 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
7124
7125 /* jump back to the return sequence. */
7126 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7127 }
7128 return off;
7129}
7130
7131
7132/**
7133 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
7134 */
7135static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
7136{
7137 /*
7138 * Generate the rc + rcPassUp fiddling code if needed.
7139 */
7140 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
7141 if (idxLabel != UINT32_MAX)
7142 {
7143 iemNativeLabelDefine(pReNative, idxLabel, off);
7144
7145 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
7146#ifdef RT_ARCH_AMD64
7147# ifdef RT_OS_WINDOWS
7148# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7149 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
7150# endif
7151 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
7152 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
7153# else
7154 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
7155 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
7156# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7157 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
7158# endif
7159# endif
7160# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7161 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
7162# endif
7163
7164#else
7165 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
7166 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7167 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
7168#endif
7169
7170 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
7171 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
7172 }
7173 return off;
7174}
7175
7176
7177/**
7178 * Emits a standard epilog.
7179 */
7180static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
7181{
7182 *pidxReturnLabel = UINT32_MAX;
7183
7184 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
7185 off = iemNativeRegFlushPendingWrites(pReNative, off);
7186
7187 /*
7188 * Successful return, so clear the return register (eax, w0).
7189 */
7190 off = iemNativeEmitGprZero(pReNative,off, IEMNATIVE_CALL_RET_GREG);
7191
7192 /*
7193 * Define label for common return point.
7194 */
7195 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
7196 *pidxReturnLabel = idxReturn;
7197
7198 /*
7199 * Restore registers and return.
7200 */
7201#ifdef RT_ARCH_AMD64
7202 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
7203
7204 /* Reposition esp at the r15 restore point. */
7205 pbCodeBuf[off++] = X86_OP_REX_W;
7206 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
7207 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
7208 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
7209
7210 /* Pop non-volatile registers and return */
7211 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
7212 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
7213 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
7214 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
7215 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
7216 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
7217 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
7218 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
7219# ifdef RT_OS_WINDOWS
7220 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
7221 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
7222# endif
7223 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
7224 pbCodeBuf[off++] = 0xc9; /* leave */
7225 pbCodeBuf[off++] = 0xc3; /* ret */
7226 pbCodeBuf[off++] = 0xcc; /* int3 poison */
7227
7228#elif RT_ARCH_ARM64
7229 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7230
7231 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
7232 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
7233 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
7234 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
7235 IEMNATIVE_FRAME_VAR_SIZE / 8);
7236 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
7237 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7238 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
7239 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7240 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
7241 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7242 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
7243 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7244 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
7245 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7246 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
7247 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
7248
7249 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
7250 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
7251 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
7252 IEMNATIVE_FRAME_SAVE_REG_SIZE);
7253
7254 /* retab / ret */
7255# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
7256 if (1)
7257 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
7258 else
7259# endif
7260 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
7261
7262#else
7263# error "port me"
7264#endif
7265 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7266
7267 return iemNativeEmitRcFiddling(pReNative, off, idxReturn);
7268}
7269
7270
7271/**
7272 * Emits a standard prolog.
7273 */
7274static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7275{
7276#ifdef RT_ARCH_AMD64
7277 /*
7278 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
7279 * reserving 64 bytes for stack variables plus 4 non-register argument
7280 * slots. Fixed register assignment: xBX = pReNative;
7281 *
7282 * Since we always do the same register spilling, we can use the same
7283 * unwind description for all the code.
7284 */
7285 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7286 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
7287 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
7288 pbCodeBuf[off++] = 0x8b;
7289 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
7290 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
7291 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
7292# ifdef RT_OS_WINDOWS
7293 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
7294 pbCodeBuf[off++] = 0x8b;
7295 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
7296 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
7297 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
7298# else
7299 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
7300 pbCodeBuf[off++] = 0x8b;
7301 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
7302# endif
7303 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
7304 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
7305 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
7306 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
7307 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
7308 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
7309 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
7310 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
7311
7312# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
7313 /* Save the frame pointer. */
7314 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
7315# endif
7316
7317 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
7318 X86_GREG_xSP,
7319 IEMNATIVE_FRAME_ALIGN_SIZE
7320 + IEMNATIVE_FRAME_VAR_SIZE
7321 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
7322 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
7323 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
7324 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
7325 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
7326
7327#elif RT_ARCH_ARM64
7328 /*
7329 * We set up a stack frame exactly like on x86, only we have to push the
7330 * return address our selves here. We save all non-volatile registers.
7331 */
7332 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
7333
7334# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
7335 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
7336 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
7337 * in any way conditional, so just emitting this instructions now and hoping for the best... */
7338 /* pacibsp */
7339 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
7340# endif
7341
7342 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
7343 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
7344 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
7345 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
7346 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
7347 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
7348 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7349 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
7350 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7351 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
7352 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7353 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
7354 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7355 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
7356 /* Save the BP and LR (ret address) registers at the top of the frame. */
7357 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7358 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
7359 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
7360 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
7361 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
7362 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
7363
7364 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
7365 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
7366
7367 /* mov r28, r0 */
7368 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
7369 /* mov r27, r1 */
7370 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
7371
7372# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
7373 /* Save the frame pointer. */
7374 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
7375 ARMV8_A64_REG_X2);
7376# endif
7377
7378#else
7379# error "port me"
7380#endif
7381 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7382 return off;
7383}
7384
7385
7386
7387
7388/*********************************************************************************************************************************
7389* Emitters for IEM_MC_BEGIN and IEM_MC_END. *
7390*********************************************************************************************************************************/
7391
7392#define IEM_MC_BEGIN(a_cArgs, a_cLocals, a_fMcFlags, a_fCImplFlags) \
7393 { \
7394 Assert(pReNative->Core.bmVars == 0); \
7395 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
7396 Assert(pReNative->Core.bmStack == 0); \
7397 pReNative->fMc = (a_fMcFlags); \
7398 pReNative->fCImpl = (a_fCImplFlags); \
7399 pReNative->cArgs = ((a_cArgs) + iemNativeArgGetHiddenArgCount(pReNative))
7400
7401/** We have to get to the end in recompilation mode, as otherwise we won't
7402 * generate code for all the IEM_MC_IF_XXX branches. */
7403#define IEM_MC_END() \
7404 iemNativeVarFreeAll(pReNative); \
7405 } return off
7406
7407
7408
7409/*********************************************************************************************************************************
7410* Native Emitter Support. *
7411*********************************************************************************************************************************/
7412
7413
7414#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
7415
7416#define IEM_MC_NATIVE_ELSE() } else {
7417
7418#define IEM_MC_NATIVE_ENDIF() } ((void)0)
7419
7420
7421#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
7422 off = a_fnEmitter(pReNative, off)
7423
7424#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
7425 off = a_fnEmitter(pReNative, off, (a0))
7426
7427#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
7428 off = a_fnEmitter(pReNative, off, (a0), (a1))
7429
7430#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
7431 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
7432
7433#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
7434 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
7435
7436#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
7437 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
7438
7439#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
7440 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
7441
7442#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
7443 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
7444
7445#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
7446 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
7447
7448
7449
7450/*********************************************************************************************************************************
7451* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
7452*********************************************************************************************************************************/
7453
7454#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
7455 pReNative->fMc = 0; \
7456 pReNative->fCImpl = (a_fFlags); \
7457 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
7458
7459
7460#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
7461 pReNative->fMc = 0; \
7462 pReNative->fCImpl = (a_fFlags); \
7463 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
7464
7465DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7466 uint8_t idxInstr, uint64_t a_fGstShwFlush,
7467 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
7468{
7469 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
7470}
7471
7472
7473#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
7474 pReNative->fMc = 0; \
7475 pReNative->fCImpl = (a_fFlags); \
7476 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
7477 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
7478
7479DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7480 uint8_t idxInstr, uint64_t a_fGstShwFlush,
7481 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
7482{
7483 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
7484}
7485
7486
7487#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
7488 pReNative->fMc = 0; \
7489 pReNative->fCImpl = (a_fFlags); \
7490 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
7491 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
7492
7493DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7494 uint8_t idxInstr, uint64_t a_fGstShwFlush,
7495 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
7496 uint64_t uArg2)
7497{
7498 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
7499}
7500
7501
7502
7503/*********************************************************************************************************************************
7504* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
7505*********************************************************************************************************************************/
7506
7507/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
7508 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
7509DECL_INLINE_THROW(uint32_t)
7510iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7511{
7512 /*
7513 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
7514 * return with special status code and make the execution loop deal with
7515 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
7516 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
7517 * could continue w/o interruption, it probably will drop into the
7518 * debugger, so not worth the effort of trying to services it here and we
7519 * just lump it in with the handling of the others.
7520 *
7521 * To simplify the code and the register state management even more (wrt
7522 * immediate in AND operation), we always update the flags and skip the
7523 * extra check associated conditional jump.
7524 */
7525 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
7526 <= UINT32_MAX);
7527#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
7528 AssertMsg( pReNative->idxCurCall == 0
7529 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)),
7530 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)));
7531#endif
7532
7533 /*
7534 * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
7535 * any pending register writes must be flushed.
7536 */
7537 off = iemNativeRegFlushPendingWrites(pReNative, off);
7538
7539 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
7540 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
7541 true /*fSkipLivenessAssert*/);
7542 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
7543 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
7544 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
7545 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
7546 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
7547
7548 /* Free but don't flush the EFLAGS register. */
7549 iemNativeRegFreeTmp(pReNative, idxEflReg);
7550
7551 return off;
7552}
7553
7554
7555/** The VINF_SUCCESS dummy. */
7556template<int const a_rcNormal>
7557DECL_FORCE_INLINE(uint32_t)
7558iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
7559{
7560 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
7561 if (a_rcNormal != VINF_SUCCESS)
7562 {
7563#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7564 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7565#else
7566 RT_NOREF_PV(idxInstr);
7567#endif
7568
7569 /* As this code returns from the TB any pending register writes must be flushed. */
7570 off = iemNativeRegFlushPendingWrites(pReNative, off);
7571
7572 return iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_ReturnBreak);
7573 }
7574 return off;
7575}
7576
7577
7578#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
7579 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
7580 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7581
7582#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
7583 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
7584 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7585 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7586
7587/** Same as iemRegAddToRip64AndFinishingNoFlags. */
7588DECL_INLINE_THROW(uint32_t)
7589iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
7590{
7591#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
7592# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
7593 if (!pReNative->Core.offPc)
7594 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7595# endif
7596
7597 /* Allocate a temporary PC register. */
7598 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
7599
7600 /* Perform the addition and store the result. */
7601 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
7602 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7603
7604 /* Free but don't flush the PC register. */
7605 iemNativeRegFreeTmp(pReNative, idxPcReg);
7606#endif
7607
7608#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7609 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
7610
7611 pReNative->Core.offPc += cbInstr;
7612# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
7613 off = iemNativePcAdjustCheck(pReNative, off);
7614# endif
7615 if (pReNative->cCondDepth)
7616 off = iemNativeEmitPcWriteback(pReNative, off);
7617 else
7618 pReNative->Core.cInstrPcUpdateSkipped++;
7619#endif
7620
7621 return off;
7622}
7623
7624
7625#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
7626 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
7627 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7628
7629#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
7630 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
7631 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7632 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7633
7634/** Same as iemRegAddToEip32AndFinishingNoFlags. */
7635DECL_INLINE_THROW(uint32_t)
7636iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
7637{
7638#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
7639# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
7640 if (!pReNative->Core.offPc)
7641 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7642# endif
7643
7644 /* Allocate a temporary PC register. */
7645 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
7646
7647 /* Perform the addition and store the result. */
7648 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
7649 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7650
7651 /* Free but don't flush the PC register. */
7652 iemNativeRegFreeTmp(pReNative, idxPcReg);
7653#endif
7654
7655#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7656 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
7657
7658 pReNative->Core.offPc += cbInstr;
7659# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
7660 off = iemNativePcAdjustCheck(pReNative, off);
7661# endif
7662 if (pReNative->cCondDepth)
7663 off = iemNativeEmitPcWriteback(pReNative, off);
7664 else
7665 pReNative->Core.cInstrPcUpdateSkipped++;
7666#endif
7667
7668 return off;
7669}
7670
7671
7672#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
7673 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
7674 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7675
7676#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
7677 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
7678 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7679 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7680
7681/** Same as iemRegAddToIp16AndFinishingNoFlags. */
7682DECL_INLINE_THROW(uint32_t)
7683iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
7684{
7685#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
7686# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
7687 if (!pReNative->Core.offPc)
7688 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7689# endif
7690
7691 /* Allocate a temporary PC register. */
7692 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
7693
7694 /* Perform the addition and store the result. */
7695 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
7696 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
7697 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7698
7699 /* Free but don't flush the PC register. */
7700 iemNativeRegFreeTmp(pReNative, idxPcReg);
7701#endif
7702
7703#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7704 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
7705
7706 pReNative->Core.offPc += cbInstr;
7707# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
7708 off = iemNativePcAdjustCheck(pReNative, off);
7709# endif
7710 if (pReNative->cCondDepth)
7711 off = iemNativeEmitPcWriteback(pReNative, off);
7712 else
7713 pReNative->Core.cInstrPcUpdateSkipped++;
7714#endif
7715
7716 return off;
7717}
7718
7719
7720
7721/*********************************************************************************************************************************
7722* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
7723*********************************************************************************************************************************/
7724
7725#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
7726 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
7727 (a_enmEffOpSize), pCallEntry->idxInstr); \
7728 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7729
7730#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
7731 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
7732 (a_enmEffOpSize), pCallEntry->idxInstr); \
7733 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7734 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7735
7736#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
7737 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
7738 IEMMODE_16BIT, pCallEntry->idxInstr); \
7739 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7740
7741#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
7742 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
7743 IEMMODE_16BIT, pCallEntry->idxInstr); \
7744 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7745 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7746
7747#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
7748 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
7749 IEMMODE_64BIT, pCallEntry->idxInstr); \
7750 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7751
7752#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
7753 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
7754 IEMMODE_64BIT, pCallEntry->idxInstr); \
7755 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7756 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7757
7758/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
7759 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
7760 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
7761DECL_INLINE_THROW(uint32_t)
7762iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
7763 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
7764{
7765 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
7766
7767 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
7768 off = iemNativeRegFlushPendingWrites(pReNative, off);
7769
7770#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7771 Assert(pReNative->Core.offPc == 0);
7772
7773 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
7774#endif
7775
7776 /* Allocate a temporary PC register. */
7777 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
7778
7779 /* Perform the addition. */
7780 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
7781
7782 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
7783 {
7784 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
7785 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
7786 }
7787 else
7788 {
7789 /* Just truncate the result to 16-bit IP. */
7790 Assert(enmEffOpSize == IEMMODE_16BIT);
7791 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
7792 }
7793 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7794
7795 /* Free but don't flush the PC register. */
7796 iemNativeRegFreeTmp(pReNative, idxPcReg);
7797
7798 return off;
7799}
7800
7801
7802#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
7803 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
7804 (a_enmEffOpSize), pCallEntry->idxInstr); \
7805 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7806
7807#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
7808 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
7809 (a_enmEffOpSize), pCallEntry->idxInstr); \
7810 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7811 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7812
7813#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
7814 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
7815 IEMMODE_16BIT, pCallEntry->idxInstr); \
7816 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7817
7818#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
7819 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
7820 IEMMODE_16BIT, pCallEntry->idxInstr); \
7821 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7822 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7823
7824#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
7825 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
7826 IEMMODE_32BIT, pCallEntry->idxInstr); \
7827 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7828
7829#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
7830 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
7831 IEMMODE_32BIT, pCallEntry->idxInstr); \
7832 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7833 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7834
7835/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
7836 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
7837 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
7838DECL_INLINE_THROW(uint32_t)
7839iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
7840 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
7841{
7842 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
7843
7844 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
7845 off = iemNativeRegFlushPendingWrites(pReNative, off);
7846
7847#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7848 Assert(pReNative->Core.offPc == 0);
7849
7850 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
7851#endif
7852
7853 /* Allocate a temporary PC register. */
7854 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
7855
7856 /* Perform the addition. */
7857 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
7858
7859 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
7860 if (enmEffOpSize == IEMMODE_16BIT)
7861 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
7862
7863 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
7864/** @todo we can skip this in 32-bit FLAT mode. */
7865 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
7866
7867 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7868
7869 /* Free but don't flush the PC register. */
7870 iemNativeRegFreeTmp(pReNative, idxPcReg);
7871
7872 return off;
7873}
7874
7875
7876#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
7877 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
7878 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7879
7880#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
7881 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
7882 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7883 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7884
7885#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
7886 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
7887 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7888
7889#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
7890 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
7891 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7892 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7893
7894#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
7895 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
7896 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7897
7898#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
7899 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
7900 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
7901 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
7902
7903/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
7904DECL_INLINE_THROW(uint32_t)
7905iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
7906 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
7907{
7908 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
7909 off = iemNativeRegFlushPendingWrites(pReNative, off);
7910
7911#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7912 Assert(pReNative->Core.offPc == 0);
7913
7914 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
7915#endif
7916
7917 /* Allocate a temporary PC register. */
7918 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
7919
7920 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
7921 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
7922 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
7923 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
7924 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7925
7926 /* Free but don't flush the PC register. */
7927 iemNativeRegFreeTmp(pReNative, idxPcReg);
7928
7929 return off;
7930}
7931
7932
7933
7934/*********************************************************************************************************************************
7935* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
7936*********************************************************************************************************************************/
7937
7938/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
7939#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
7940 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
7941
7942/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
7943#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
7944 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
7945
7946/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
7947#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
7948 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
7949
7950/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
7951 * clears flags. */
7952#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
7953 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
7954 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
7955
7956/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
7957 * clears flags. */
7958#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
7959 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
7960 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
7961
7962/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
7963 * clears flags. */
7964#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
7965 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
7966 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
7967
7968#undef IEM_MC_SET_RIP_U16_AND_FINISH
7969
7970
7971/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
7972#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
7973 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
7974
7975/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
7976#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
7977 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
7978
7979/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
7980 * clears flags. */
7981#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
7982 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
7983 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
7984
7985/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
7986 * and clears flags. */
7987#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
7988 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
7989 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
7990
7991#undef IEM_MC_SET_RIP_U32_AND_FINISH
7992
7993
7994/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
7995#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
7996 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
7997
7998/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
7999 * and clears flags. */
8000#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
8001 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
8002 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
8003
8004#undef IEM_MC_SET_RIP_U64_AND_FINISH
8005
8006
8007/** Same as iemRegRipJumpU16AndFinishNoFlags,
8008 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
8009DECL_INLINE_THROW(uint32_t)
8010iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
8011 uint8_t idxInstr, uint8_t cbVar)
8012{
8013 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
8014 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
8015
8016 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
8017 off = iemNativeRegFlushPendingWrites(pReNative, off);
8018
8019#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8020 Assert(pReNative->Core.offPc == 0);
8021
8022 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
8023#endif
8024
8025 /* Get a register with the new PC loaded from idxVarPc.
8026 Note! This ASSUMES that the high bits of the GPR is zeroed. */
8027 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
8028
8029 /* Check limit (may #GP(0) + exit TB). */
8030 if (!f64Bit)
8031/** @todo we can skip this test in FLAT 32-bit mode. */
8032 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
8033 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
8034 else if (cbVar > sizeof(uint32_t))
8035 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
8036
8037 /* Store the result. */
8038 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
8039
8040 iemNativeVarRegisterRelease(pReNative, idxVarPc);
8041 /** @todo implictly free the variable? */
8042
8043 return off;
8044}
8045
8046
8047
8048/*********************************************************************************************************************************
8049* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
8050*********************************************************************************************************************************/
8051
8052#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
8053 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
8054
8055/**
8056 * Emits code to check if a \#NM exception should be raised.
8057 *
8058 * @returns New code buffer offset, UINT32_MAX on failure.
8059 * @param pReNative The native recompile state.
8060 * @param off The code buffer offset.
8061 * @param idxInstr The current instruction.
8062 */
8063DECL_INLINE_THROW(uint32_t)
8064iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
8065{
8066 /*
8067 * Make sure we don't have any outstanding guest register writes as we may
8068 * raise an #NM and all guest register must be up to date in CPUMCTX.
8069 *
8070 * @todo r=aeichner Can we postpone this to the RaiseNm path?
8071 */
8072 off = iemNativeRegFlushPendingWrites(pReNative, off);
8073
8074#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8075 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8076#else
8077 RT_NOREF(idxInstr);
8078#endif
8079
8080 /* Allocate a temporary CR0 register. */
8081 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
8082 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
8083
8084 /*
8085 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
8086 * return raisexcpt();
8087 */
8088 /* Test and jump. */
8089 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM | X86_CR0_TS, idxLabelRaiseNm);
8090
8091 /* Free but don't flush the CR0 register. */
8092 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
8093
8094 return off;
8095}
8096
8097
8098#define IEM_MC_MAYBE_RAISE_FPU_XCPT() \
8099 off = iemNativeEmitMaybeFpuException(pReNative, off, pCallEntry->idxInstr)
8100
8101/**
8102 * Emits code to check if a \#MF exception should be raised.
8103 *
8104 * @returns New code buffer offset, UINT32_MAX on failure.
8105 * @param pReNative The native recompile state.
8106 * @param off The code buffer offset.
8107 * @param idxInstr The current instruction.
8108 */
8109DECL_INLINE_THROW(uint32_t)
8110iemNativeEmitMaybeRaiseFpuException(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
8111{
8112 /*
8113 * Make sure we don't have any outstanding guest register writes as we may
8114 * raise an #MF and all guest register must be up to date in CPUMCTX.
8115 *
8116 * @todo r=aeichner Can we postpone this to the RaiseMf path?
8117 */
8118 off = iemNativeRegFlushPendingWrites(pReNative, off);
8119
8120#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8121 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8122#else
8123 RT_NOREF(idxInstr);
8124#endif
8125
8126 /* Allocate a temporary FSW register. */
8127 uint8_t const idxFpuFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw, kIemNativeGstRegUse_ReadOnly);
8128 uint8_t const idxLabelRaiseMf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseMf);
8129
8130 /*
8131 * if (FSW & X86_FSW_ES != 0)
8132 * return raisexcpt();
8133 */
8134 /* Test and jump. */
8135 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxFpuFswReg, X86_FSW_ES, idxLabelRaiseMf);
8136
8137 /* Free but don't flush the FSW register. */
8138 iemNativeRegFreeTmp(pReNative, idxFpuFswReg);
8139
8140 return off;
8141}
8142
8143
8144#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
8145 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
8146
8147/**
8148 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
8149 *
8150 * @returns New code buffer offset, UINT32_MAX on failure.
8151 * @param pReNative The native recompile state.
8152 * @param off The code buffer offset.
8153 * @param idxInstr The current instruction.
8154 */
8155DECL_INLINE_THROW(uint32_t)
8156iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
8157{
8158 /*
8159 * Make sure we don't have any outstanding guest register writes as we may
8160 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
8161 *
8162 * @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path?
8163 */
8164 off = iemNativeRegFlushPendingWrites(pReNative, off, false /*fFlushShadows*/);
8165
8166#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8167 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8168#else
8169 RT_NOREF(idxInstr);
8170#endif
8171
8172 /* Allocate a temporary CR0 and CR4 register. */
8173 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
8174 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4, kIemNativeGstRegUse_ReadOnly);
8175 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
8176 uint8_t const idxLabelRaiseUd = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseUd);
8177
8178 /** @todo r=aeichner Optimize this more later to have less compares and branches,
8179 * (see IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() in IEMMc.h but check that it has some
8180 * actual performance benefit first). */
8181 /*
8182 * if (cr0 & X86_CR0_EM)
8183 * return raisexcpt();
8184 */
8185 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxCr0Reg, X86_CR0_EM_BIT, idxLabelRaiseUd);
8186 /*
8187 * if (!(cr4 & X86_CR4_OSFXSR))
8188 * return raisexcpt();
8189 */
8190 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxCr4Reg, X86_CR4_OSFXSR_BIT, idxLabelRaiseUd);
8191 /*
8192 * if (cr0 & X86_CR0_TS)
8193 * return raisexcpt();
8194 */
8195 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxCr0Reg, X86_CR0_TS_BIT, idxLabelRaiseNm);
8196
8197 /* Free but don't flush the CR0 and CR4 register. */
8198 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
8199 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
8200
8201 return off;
8202}
8203
8204
8205#define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
8206 off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
8207
8208/**
8209 * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
8210 *
8211 * @returns New code buffer offset, UINT32_MAX on failure.
8212 * @param pReNative The native recompile state.
8213 * @param off The code buffer offset.
8214 * @param idxInstr The current instruction.
8215 */
8216DECL_INLINE_THROW(uint32_t)
8217iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
8218{
8219 /*
8220 * Make sure we don't have any outstanding guest register writes as we may
8221 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
8222 *
8223 * @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path?
8224 */
8225 off = iemNativeRegFlushPendingWrites(pReNative, off, false /*fFlushShadows*/);
8226
8227#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8228 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8229#else
8230 RT_NOREF(idxInstr);
8231#endif
8232
8233 /* Allocate a temporary CR0, CR4 and XCR0 register. */
8234 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
8235 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4, kIemNativeGstRegUse_ReadOnly);
8236 uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0, kIemNativeGstRegUse_ReadOnly);
8237 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
8238 uint8_t const idxLabelRaiseUd = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseUd);
8239
8240 /** @todo r=aeichner Optimize this more later to have less compares and branches,
8241 * (see IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() in IEMMc.h but check that it has some
8242 * actual performance benefit first). */
8243 /*
8244 * if ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE))
8245 * return raisexcpt();
8246 */
8247 const uint8_t idxRegTmp = iemNativeRegAllocTmpImm(pReNative, &off, XSAVE_C_YMM | XSAVE_C_SSE);
8248 off = iemNativeEmitAndGprByGpr(pReNative, off, idxRegTmp, idxXcr0Reg);
8249 off = iemNativeEmitTestIfGprNotEqualImmAndJmpToLabel(pReNative, off, idxRegTmp, XSAVE_C_YMM | XSAVE_C_SSE, idxLabelRaiseUd);
8250 iemNativeRegFreeTmp(pReNative, idxRegTmp);
8251
8252 /*
8253 * if (!(cr4 & X86_CR4_OSXSAVE))
8254 * return raisexcpt();
8255 */
8256 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxCr4Reg, X86_CR4_OSXSAVE_BIT, idxLabelRaiseUd);
8257 /*
8258 * if (cr0 & X86_CR0_TS)
8259 * return raisexcpt();
8260 */
8261 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxCr0Reg, X86_CR0_TS_BIT, idxLabelRaiseNm);
8262
8263 /* Free but don't flush the CR0, CR4 and XCR0 register. */
8264 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
8265 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
8266 iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
8267
8268 return off;
8269}
8270
8271
8272#define IEM_MC_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT() \
8273 off = iemNativeEmitRaiseSseAvxSimdFpXcpt(pReNative, off, pCallEntry->idxInstr)
8274
8275/**
8276 * Emits code to raise a SIMD floating point (either \#UD or \#XF) should be raised.
8277 *
8278 * @returns New code buffer offset, UINT32_MAX on failure.
8279 * @param pReNative The native recompile state.
8280 * @param off The code buffer offset.
8281 * @param idxInstr The current instruction.
8282 */
8283DECL_INLINE_THROW(uint32_t)
8284iemNativeEmitRaiseSseAvxSimdFpXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
8285{
8286 /*
8287 * Make sure we don't have any outstanding guest register writes as we may
8288 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
8289 *
8290 * @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path?
8291 */
8292 off = iemNativeRegFlushPendingWrites(pReNative, off);
8293
8294#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8295 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8296#else
8297 RT_NOREF(idxInstr);
8298#endif
8299
8300 /* Allocate a temporary CR4 register. */
8301 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4, kIemNativeGstRegUse_ReadOnly);
8302 uint8_t const idxLabelRaiseXf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseXf);
8303 uint8_t const idxLabelRaiseUd = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseUd);
8304
8305 /*
8306 * if (!(cr4 & X86_CR4_OSXMMEEXCPT))
8307 * return raisexcpt();
8308 */
8309 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxCr4Reg, X86_CR4_OSXMMEEXCPT_BIT, idxLabelRaiseXf);
8310
8311 /* raise \#UD exception unconditionally. */
8312 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelRaiseUd);
8313
8314 /* Free but don't flush the CR4 register. */
8315 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
8316
8317 return off;
8318}
8319
8320
8321
8322/*********************************************************************************************************************************
8323* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
8324*********************************************************************************************************************************/
8325
8326/**
8327 * Pushes an IEM_MC_IF_XXX onto the condition stack.
8328 *
8329 * @returns Pointer to the condition stack entry on success, NULL on failure
8330 * (too many nestings)
8331 */
8332DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
8333{
8334#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8335 *poff = iemNativeRegFlushPendingWrites(pReNative, *poff);
8336#endif
8337
8338 uint32_t const idxStack = pReNative->cCondDepth;
8339 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
8340
8341 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
8342 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
8343
8344 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
8345 pEntry->fInElse = false;
8346 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
8347 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
8348
8349 return pEntry;
8350}
8351
8352
8353/**
8354 * Start of the if-block, snapshotting the register and variable state.
8355 */
8356DECL_INLINE_THROW(void)
8357iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
8358{
8359 Assert(offIfBlock != UINT32_MAX);
8360 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
8361 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
8362 Assert(!pEntry->fInElse);
8363
8364 /* Define the start of the IF block if request or for disassembly purposes. */
8365 if (idxLabelIf != UINT32_MAX)
8366 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
8367#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8368 else
8369 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
8370#else
8371 RT_NOREF(offIfBlock);
8372#endif
8373
8374#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8375 Assert(pReNative->Core.offPc == 0);
8376#endif
8377
8378 /* Copy the initial state so we can restore it in the 'else' block. */
8379 pEntry->InitialState = pReNative->Core;
8380}
8381
8382
8383#define IEM_MC_ELSE() } while (0); \
8384 off = iemNativeEmitElse(pReNative, off); \
8385 do {
8386
8387/** Emits code related to IEM_MC_ELSE. */
8388DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
8389{
8390 /* Check sanity and get the conditional stack entry. */
8391 Assert(off != UINT32_MAX);
8392 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
8393 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
8394 Assert(!pEntry->fInElse);
8395
8396 /* Jump to the endif */
8397 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
8398
8399 /* Define the else label and enter the else part of the condition. */
8400 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
8401 pEntry->fInElse = true;
8402
8403#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8404 Assert(pReNative->Core.offPc == 0);
8405#endif
8406
8407 /* Snapshot the core state so we can do a merge at the endif and restore
8408 the snapshot we took at the start of the if-block. */
8409 pEntry->IfFinalState = pReNative->Core;
8410 pReNative->Core = pEntry->InitialState;
8411
8412 return off;
8413}
8414
8415
8416#define IEM_MC_ENDIF() } while (0); \
8417 off = iemNativeEmitEndIf(pReNative, off)
8418
8419/** Emits code related to IEM_MC_ENDIF. */
8420DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
8421{
8422 /* Check sanity and get the conditional stack entry. */
8423 Assert(off != UINT32_MAX);
8424 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
8425 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
8426
8427#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8428 Assert(pReNative->Core.offPc == 0);
8429#endif
8430
8431 /*
8432 * Now we have find common group with the core state at the end of the
8433 * if-final. Use the smallest common denominator and just drop anything
8434 * that isn't the same in both states.
8435 */
8436 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
8437 * which is why we're doing this at the end of the else-block.
8438 * But we'd need more info about future for that to be worth the effort. */
8439 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
8440 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
8441 {
8442 /* shadow guest stuff first. */
8443 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
8444 if (fGstRegs)
8445 {
8446 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
8447 do
8448 {
8449 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
8450 fGstRegs &= ~RT_BIT_64(idxGstReg);
8451
8452 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
8453 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
8454 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
8455 {
8456 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
8457 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
8458 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
8459 }
8460 } while (fGstRegs);
8461 }
8462 else
8463 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
8464
8465 /* Check variables next. For now we must require them to be identical
8466 or stuff we can recreate. */
8467 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
8468 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
8469 if (fVars)
8470 {
8471 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
8472 do
8473 {
8474 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
8475 fVars &= ~RT_BIT_32(idxVar);
8476
8477 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
8478 {
8479 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
8480 continue;
8481 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
8482 {
8483 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8484 if (idxHstReg != UINT8_MAX)
8485 {
8486 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8487 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8488 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
8489 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
8490 }
8491 continue;
8492 }
8493 }
8494 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
8495 continue;
8496
8497 /* Irreconcilable, so drop it. */
8498 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8499 if (idxHstReg != UINT8_MAX)
8500 {
8501 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8502 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8503 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
8504 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
8505 }
8506 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
8507 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8508 } while (fVars);
8509 }
8510
8511 /* Finally, check that the host register allocations matches. */
8512 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
8513 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
8514 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
8515 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
8516 }
8517
8518 /*
8519 * Define the endif label and maybe the else one if we're still in the 'if' part.
8520 */
8521 if (!pEntry->fInElse)
8522 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
8523 else
8524 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
8525 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
8526
8527 /* Pop the conditional stack.*/
8528 pReNative->cCondDepth -= 1;
8529
8530 return off;
8531}
8532
8533
8534#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
8535 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
8536 do {
8537
8538/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
8539DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
8540{
8541 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8542
8543 /* Get the eflags. */
8544 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8545 kIemNativeGstRegUse_ReadOnly);
8546
8547 /* Test and jump. */
8548 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
8549
8550 /* Free but don't flush the EFlags register. */
8551 iemNativeRegFreeTmp(pReNative, idxEflReg);
8552
8553 /* Make a copy of the core state now as we start the if-block. */
8554 iemNativeCondStartIfBlock(pReNative, off);
8555
8556 return off;
8557}
8558
8559
8560#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
8561 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
8562 do {
8563
8564/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
8565DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
8566{
8567 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8568
8569 /* Get the eflags. */
8570 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8571 kIemNativeGstRegUse_ReadOnly);
8572
8573 /* Test and jump. */
8574 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
8575
8576 /* Free but don't flush the EFlags register. */
8577 iemNativeRegFreeTmp(pReNative, idxEflReg);
8578
8579 /* Make a copy of the core state now as we start the if-block. */
8580 iemNativeCondStartIfBlock(pReNative, off);
8581
8582 return off;
8583}
8584
8585
8586#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
8587 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
8588 do {
8589
8590/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
8591DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
8592{
8593 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8594
8595 /* Get the eflags. */
8596 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8597 kIemNativeGstRegUse_ReadOnly);
8598
8599 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
8600 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
8601
8602 /* Test and jump. */
8603 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
8604
8605 /* Free but don't flush the EFlags register. */
8606 iemNativeRegFreeTmp(pReNative, idxEflReg);
8607
8608 /* Make a copy of the core state now as we start the if-block. */
8609 iemNativeCondStartIfBlock(pReNative, off);
8610
8611 return off;
8612}
8613
8614
8615#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
8616 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
8617 do {
8618
8619/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
8620DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
8621{
8622 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8623
8624 /* Get the eflags. */
8625 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8626 kIemNativeGstRegUse_ReadOnly);
8627
8628 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
8629 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
8630
8631 /* Test and jump. */
8632 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
8633
8634 /* Free but don't flush the EFlags register. */
8635 iemNativeRegFreeTmp(pReNative, idxEflReg);
8636
8637 /* Make a copy of the core state now as we start the if-block. */
8638 iemNativeCondStartIfBlock(pReNative, off);
8639
8640 return off;
8641}
8642
8643
8644#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
8645 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
8646 do {
8647
8648#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
8649 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
8650 do {
8651
8652/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
8653DECL_INLINE_THROW(uint32_t)
8654iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8655 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
8656{
8657 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8658
8659 /* Get the eflags. */
8660 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8661 kIemNativeGstRegUse_ReadOnly);
8662
8663 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
8664 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
8665
8666 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
8667 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
8668 Assert(iBitNo1 != iBitNo2);
8669
8670#ifdef RT_ARCH_AMD64
8671 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
8672
8673 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
8674 if (iBitNo1 > iBitNo2)
8675 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
8676 else
8677 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
8678 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
8679
8680#elif defined(RT_ARCH_ARM64)
8681 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
8682 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8683
8684 /* and tmpreg, eflreg, #1<<iBitNo1 */
8685 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
8686
8687 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
8688 if (iBitNo1 > iBitNo2)
8689 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
8690 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
8691 else
8692 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
8693 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
8694
8695 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8696
8697#else
8698# error "Port me"
8699#endif
8700
8701 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
8702 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
8703 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
8704
8705 /* Free but don't flush the EFlags and tmp registers. */
8706 iemNativeRegFreeTmp(pReNative, idxTmpReg);
8707 iemNativeRegFreeTmp(pReNative, idxEflReg);
8708
8709 /* Make a copy of the core state now as we start the if-block. */
8710 iemNativeCondStartIfBlock(pReNative, off);
8711
8712 return off;
8713}
8714
8715
8716#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
8717 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
8718 do {
8719
8720#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
8721 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
8722 do {
8723
8724/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
8725 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
8726DECL_INLINE_THROW(uint32_t)
8727iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
8728 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
8729{
8730 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8731
8732 /* We need an if-block label for the non-inverted variant. */
8733 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
8734 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
8735
8736 /* Get the eflags. */
8737 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8738 kIemNativeGstRegUse_ReadOnly);
8739
8740 /* Translate the flag masks to bit numbers. */
8741 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
8742 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
8743
8744 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
8745 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
8746 Assert(iBitNo1 != iBitNo);
8747
8748 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
8749 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
8750 Assert(iBitNo2 != iBitNo);
8751 Assert(iBitNo2 != iBitNo1);
8752
8753#ifdef RT_ARCH_AMD64
8754 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
8755#elif defined(RT_ARCH_ARM64)
8756 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
8757#endif
8758
8759 /* Check for the lone bit first. */
8760 if (!fInverted)
8761 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
8762 else
8763 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
8764
8765 /* Then extract and compare the other two bits. */
8766#ifdef RT_ARCH_AMD64
8767 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
8768 if (iBitNo1 > iBitNo2)
8769 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
8770 else
8771 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
8772 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
8773
8774#elif defined(RT_ARCH_ARM64)
8775 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8776
8777 /* and tmpreg, eflreg, #1<<iBitNo1 */
8778 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
8779
8780 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
8781 if (iBitNo1 > iBitNo2)
8782 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
8783 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
8784 else
8785 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
8786 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
8787
8788 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8789
8790#else
8791# error "Port me"
8792#endif
8793
8794 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
8795 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
8796 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
8797
8798 /* Free but don't flush the EFlags and tmp registers. */
8799 iemNativeRegFreeTmp(pReNative, idxTmpReg);
8800 iemNativeRegFreeTmp(pReNative, idxEflReg);
8801
8802 /* Make a copy of the core state now as we start the if-block. */
8803 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
8804
8805 return off;
8806}
8807
8808
8809#define IEM_MC_IF_CX_IS_NZ() \
8810 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
8811 do {
8812
8813/** Emits code for IEM_MC_IF_CX_IS_NZ. */
8814DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
8815{
8816 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8817
8818 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
8819 kIemNativeGstRegUse_ReadOnly);
8820 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
8821 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
8822
8823 iemNativeCondStartIfBlock(pReNative, off);
8824 return off;
8825}
8826
8827
8828#define IEM_MC_IF_ECX_IS_NZ() \
8829 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
8830 do {
8831
8832#define IEM_MC_IF_RCX_IS_NZ() \
8833 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
8834 do {
8835
8836/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
8837DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
8838{
8839 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8840
8841 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
8842 kIemNativeGstRegUse_ReadOnly);
8843 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
8844 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
8845
8846 iemNativeCondStartIfBlock(pReNative, off);
8847 return off;
8848}
8849
8850
8851#define IEM_MC_IF_CX_IS_NOT_ONE() \
8852 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
8853 do {
8854
8855/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
8856DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
8857{
8858 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8859
8860 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
8861 kIemNativeGstRegUse_ReadOnly);
8862#ifdef RT_ARCH_AMD64
8863 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
8864#else
8865 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
8866 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
8867 iemNativeRegFreeTmp(pReNative, idxTmpReg);
8868#endif
8869 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
8870
8871 iemNativeCondStartIfBlock(pReNative, off);
8872 return off;
8873}
8874
8875
8876#define IEM_MC_IF_ECX_IS_NOT_ONE() \
8877 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
8878 do {
8879
8880#define IEM_MC_IF_RCX_IS_NOT_ONE() \
8881 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
8882 do {
8883
8884/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
8885DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
8886{
8887 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8888
8889 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
8890 kIemNativeGstRegUse_ReadOnly);
8891 if (f64Bit)
8892 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
8893 else
8894 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
8895 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
8896
8897 iemNativeCondStartIfBlock(pReNative, off);
8898 return off;
8899}
8900
8901
8902#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
8903 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
8904 do {
8905
8906#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
8907 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
8908 do {
8909
8910/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
8911 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
8912DECL_INLINE_THROW(uint32_t)
8913iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
8914{
8915 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8916
8917 /* We have to load both RCX and EFLAGS before we can start branching,
8918 otherwise we'll end up in the else-block with an inconsistent
8919 register allocator state.
8920 Doing EFLAGS first as it's more likely to be loaded, right? */
8921 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8922 kIemNativeGstRegUse_ReadOnly);
8923 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
8924 kIemNativeGstRegUse_ReadOnly);
8925
8926 /** @todo we could reduce this to a single branch instruction by spending a
8927 * temporary register and some setnz stuff. Not sure if loops are
8928 * worth it. */
8929 /* Check CX. */
8930#ifdef RT_ARCH_AMD64
8931 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
8932#else
8933 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
8934 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
8935 iemNativeRegFreeTmp(pReNative, idxTmpReg);
8936#endif
8937
8938 /* Check the EFlags bit. */
8939 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
8940 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
8941 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
8942 !fCheckIfSet /*fJmpIfSet*/);
8943
8944 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
8945 iemNativeRegFreeTmp(pReNative, idxEflReg);
8946
8947 iemNativeCondStartIfBlock(pReNative, off);
8948 return off;
8949}
8950
8951
8952#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
8953 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
8954 do {
8955
8956#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
8957 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
8958 do {
8959
8960#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
8961 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
8962 do {
8963
8964#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
8965 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
8966 do {
8967
8968/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
8969 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
8970 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
8971 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
8972DECL_INLINE_THROW(uint32_t)
8973iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
8974 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
8975{
8976 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
8977
8978 /* We have to load both RCX and EFLAGS before we can start branching,
8979 otherwise we'll end up in the else-block with an inconsistent
8980 register allocator state.
8981 Doing EFLAGS first as it's more likely to be loaded, right? */
8982 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
8983 kIemNativeGstRegUse_ReadOnly);
8984 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
8985 kIemNativeGstRegUse_ReadOnly);
8986
8987 /** @todo we could reduce this to a single branch instruction by spending a
8988 * temporary register and some setnz stuff. Not sure if loops are
8989 * worth it. */
8990 /* Check RCX/ECX. */
8991 if (f64Bit)
8992 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
8993 else
8994 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
8995
8996 /* Check the EFlags bit. */
8997 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
8998 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
8999 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
9000 !fCheckIfSet /*fJmpIfSet*/);
9001
9002 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
9003 iemNativeRegFreeTmp(pReNative, idxEflReg);
9004
9005 iemNativeCondStartIfBlock(pReNative, off);
9006 return off;
9007}
9008
9009
9010
9011/*********************************************************************************************************************************
9012* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
9013*********************************************************************************************************************************/
9014/** Number of hidden arguments for CIMPL calls.
9015 * @note We're sufferning from the usual VBOXSTRICTRC fun on Windows. */
9016#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
9017# define IEM_CIMPL_HIDDEN_ARGS 3
9018#else
9019# define IEM_CIMPL_HIDDEN_ARGS 2
9020#endif
9021
9022#define IEM_MC_NOREF(a_Name) \
9023 RT_NOREF_PV(a_Name)
9024
9025#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
9026 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
9027
9028#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
9029 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
9030
9031#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
9032 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
9033
9034#define IEM_MC_LOCAL(a_Type, a_Name) \
9035 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
9036
9037#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
9038 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
9039
9040
9041/**
9042 * Gets the number of hidden arguments for an expected IEM_MC_CALL statement.
9043 */
9044DECLINLINE(uint8_t) iemNativeArgGetHiddenArgCount(PIEMRECOMPILERSTATE pReNative)
9045{
9046 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_CIMPL)
9047 return IEM_CIMPL_HIDDEN_ARGS;
9048 if (pReNative->fCImpl & IEM_CIMPL_F_CALLS_AIMPL_WITH_FXSTATE)
9049 return 1;
9050 return 0;
9051}
9052
9053
9054/**
9055 * Internal work that allocates a variable with kind set to
9056 * kIemNativeVarKind_Invalid and no current stack allocation.
9057 *
9058 * The kind will either be set by the caller or later when the variable is first
9059 * assigned a value.
9060 *
9061 * @returns Unpacked index.
9062 * @internal
9063 */
9064static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
9065{
9066 Assert(cbType > 0 && cbType <= 64);
9067 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
9068 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
9069 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
9070 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
9071 pReNative->Core.aVars[idxVar].cbVar = cbType;
9072 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
9073 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
9074 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
9075 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
9076 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
9077 pReNative->Core.aVars[idxVar].fRegAcquired = false;
9078 pReNative->Core.aVars[idxVar].u.uValue = 0;
9079 return idxVar;
9080}
9081
9082
9083/**
9084 * Internal work that allocates an argument variable w/o setting enmKind.
9085 *
9086 * @returns Unpacked index.
9087 * @internal
9088 */
9089static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
9090{
9091 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
9092 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
9093 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
9094
9095 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
9096 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
9097 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
9098 return idxVar;
9099}
9100
9101
9102/**
9103 * Gets the stack slot for a stack variable, allocating one if necessary.
9104 *
9105 * Calling this function implies that the stack slot will contain a valid
9106 * variable value. The caller deals with any register currently assigned to the
9107 * variable, typically by spilling it into the stack slot.
9108 *
9109 * @returns The stack slot number.
9110 * @param pReNative The recompiler state.
9111 * @param idxVar The variable.
9112 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
9113 */
9114DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
9115{
9116 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9117 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9118 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
9119
9120 /* Already got a slot? */
9121 uint8_t const idxStackSlot = pVar->idxStackSlot;
9122 if (idxStackSlot != UINT8_MAX)
9123 {
9124 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
9125 return idxStackSlot;
9126 }
9127
9128 /*
9129 * A single slot is easy to allocate.
9130 * Allocate them from the top end, closest to BP, to reduce the displacement.
9131 */
9132 if (pVar->cbVar <= sizeof(uint64_t))
9133 {
9134 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
9135 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
9136 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
9137 pVar->idxStackSlot = (uint8_t)iSlot;
9138 Log11(("iemNativeVarSetKindToStack: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
9139 return (uint8_t)iSlot;
9140 }
9141
9142 /*
9143 * We need more than one stack slot.
9144 *
9145 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
9146 */
9147 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
9148 Assert(pVar->cbVar <= 64);
9149 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
9150 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
9151 uint32_t bmStack = ~pReNative->Core.bmStack;
9152 while (bmStack != UINT32_MAX)
9153 {
9154/** @todo allocate from the top to reduce BP displacement. */
9155 unsigned const iSlot = ASMBitFirstSetU32(bmStack) - 1;
9156 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
9157 if (!(iSlot & fBitAlignMask))
9158 {
9159 if ((bmStack & (fBitAllocMask << iSlot)) == (fBitAllocMask << iSlot))
9160 {
9161 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
9162 pVar->idxStackSlot = (uint8_t)iSlot;
9163 Log11(("iemNativeVarSetKindToStack: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
9164 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
9165 return (uint8_t)iSlot;
9166 }
9167 }
9168 bmStack |= fBitAlignMask << (iSlot & ~fBitAlignMask);
9169 }
9170 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
9171}
9172
9173
9174/**
9175 * Changes the variable to a stack variable.
9176 *
9177 * Currently this is s only possible to do the first time the variable is used,
9178 * switching later is can be implemented but not done.
9179 *
9180 * @param pReNative The recompiler state.
9181 * @param idxVar The variable.
9182 * @throws VERR_IEM_VAR_IPE_2
9183 */
9184static void iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
9185{
9186 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9187 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9188 if (pVar->enmKind != kIemNativeVarKind_Stack)
9189 {
9190 /* We could in theory transition from immediate to stack as well, but it
9191 would involve the caller doing work storing the value on the stack. So,
9192 till that's required we only allow transition from invalid. */
9193 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9194 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9195 pVar->enmKind = kIemNativeVarKind_Stack;
9196
9197 /* Note! We don't allocate a stack slot here, that's only done when a
9198 slot is actually needed to hold a variable value. */
9199 }
9200}
9201
9202
9203/**
9204 * Sets it to a variable with a constant value.
9205 *
9206 * This does not require stack storage as we know the value and can always
9207 * reload it, unless of course it's referenced.
9208 *
9209 * @param pReNative The recompiler state.
9210 * @param idxVar The variable.
9211 * @param uValue The immediate value.
9212 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
9213 */
9214static void iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
9215{
9216 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9217 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9218 if (pVar->enmKind != kIemNativeVarKind_Immediate)
9219 {
9220 /* Only simple transitions for now. */
9221 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9222 pVar->enmKind = kIemNativeVarKind_Immediate;
9223 }
9224 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9225
9226 pVar->u.uValue = uValue;
9227 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
9228 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
9229 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
9230}
9231
9232
9233/**
9234 * Sets the variable to a reference (pointer) to @a idxOtherVar.
9235 *
9236 * This does not require stack storage as we know the value and can always
9237 * reload it. Loading is postponed till needed.
9238 *
9239 * @param pReNative The recompiler state.
9240 * @param idxVar The variable. Unpacked.
9241 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
9242 *
9243 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
9244 * @internal
9245 */
9246static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
9247{
9248 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
9249 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
9250
9251 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
9252 {
9253 /* Only simple transitions for now. */
9254 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
9255 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9256 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
9257 }
9258 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9259
9260 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
9261
9262 /* Update the other variable, ensure it's a stack variable. */
9263 /** @todo handle variables with const values... that'll go boom now. */
9264 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
9265 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
9266}
9267
9268
9269/**
9270 * Sets the variable to a reference (pointer) to a guest register reference.
9271 *
9272 * This does not require stack storage as we know the value and can always
9273 * reload it. Loading is postponed till needed.
9274 *
9275 * @param pReNative The recompiler state.
9276 * @param idxVar The variable.
9277 * @param enmRegClass The class guest registers to reference.
9278 * @param idxReg The register within @a enmRegClass to reference.
9279 *
9280 * @throws VERR_IEM_VAR_IPE_2
9281 */
9282static void iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
9283 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
9284{
9285 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9286 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9287
9288 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
9289 {
9290 /* Only simple transitions for now. */
9291 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9292 pVar->enmKind = kIemNativeVarKind_GstRegRef;
9293 }
9294 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
9295
9296 pVar->u.GstRegRef.enmClass = enmRegClass;
9297 pVar->u.GstRegRef.idx = idxReg;
9298}
9299
9300
9301DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
9302{
9303 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
9304}
9305
9306
9307DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
9308{
9309 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
9310
9311 /* Since we're using a generic uint64_t value type, we must truncate it if
9312 the variable is smaller otherwise we may end up with too large value when
9313 scaling up a imm8 w/ sign-extension.
9314
9315 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
9316 in the bios, bx=1) when running on arm, because clang expect 16-bit
9317 register parameters to have bits 16 and up set to zero. Instead of
9318 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
9319 CF value in the result. */
9320 switch (cbType)
9321 {
9322 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
9323 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
9324 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
9325 }
9326 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
9327 return idxVar;
9328}
9329
9330
9331DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
9332{
9333 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
9334 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
9335 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
9336 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
9337 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
9338 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
9339
9340 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
9341 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
9342 return idxArgVar;
9343}
9344
9345
9346DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
9347{
9348 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
9349 /* Don't set to stack now, leave that to the first use as for instance
9350 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
9351 return idxVar;
9352}
9353
9354
9355DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
9356{
9357 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
9358
9359 /* Since we're using a generic uint64_t value type, we must truncate it if
9360 the variable is smaller otherwise we may end up with too large value when
9361 scaling up a imm8 w/ sign-extension. */
9362 switch (cbType)
9363 {
9364 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
9365 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
9366 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
9367 }
9368 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
9369 return idxVar;
9370}
9371
9372
9373/**
9374 * Makes sure variable @a idxVar has a register assigned to it and that it stays
9375 * fixed till we call iemNativeVarRegisterRelease.
9376 *
9377 * @returns The host register number.
9378 * @param pReNative The recompiler state.
9379 * @param idxVar The variable.
9380 * @param poff Pointer to the instruction buffer offset.
9381 * In case a register needs to be freed up or the value
9382 * loaded off the stack.
9383 * @param fInitialized Set if the variable must already have been initialized.
9384 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
9385 * the case.
9386 * @param idxRegPref Preferred register number or UINT8_MAX.
9387 */
9388DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
9389 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
9390{
9391 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9392 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9393 Assert(pVar->cbVar <= 8);
9394 Assert(!pVar->fRegAcquired);
9395
9396 uint8_t idxReg = pVar->idxReg;
9397 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9398 {
9399 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
9400 && pVar->enmKind < kIemNativeVarKind_End);
9401 pVar->fRegAcquired = true;
9402 return idxReg;
9403 }
9404
9405 /*
9406 * If the kind of variable has not yet been set, default to 'stack'.
9407 */
9408 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
9409 && pVar->enmKind < kIemNativeVarKind_End);
9410 if (pVar->enmKind == kIemNativeVarKind_Invalid)
9411 iemNativeVarSetKindToStack(pReNative, idxVar);
9412
9413 /*
9414 * We have to allocate a register for the variable, even if its a stack one
9415 * as we don't know if there are modification being made to it before its
9416 * finalized (todo: analyze and insert hints about that?).
9417 *
9418 * If we can, we try get the correct register for argument variables. This
9419 * is assuming that most argument variables are fetched as close as possible
9420 * to the actual call, so that there aren't any interfering hidden calls
9421 * (memory accesses, etc) inbetween.
9422 *
9423 * If we cannot or it's a variable, we make sure no argument registers
9424 * that will be used by this MC block will be allocated here, and we always
9425 * prefer non-volatile registers to avoid needing to spill stuff for internal
9426 * call.
9427 */
9428 /** @todo Detect too early argument value fetches and warn about hidden
9429 * calls causing less optimal code to be generated in the python script. */
9430
9431 uint8_t const uArgNo = pVar->uArgNo;
9432 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
9433 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
9434 {
9435 idxReg = g_aidxIemNativeCallRegs[uArgNo];
9436 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
9437 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
9438 }
9439 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
9440 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
9441 {
9442 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
9443 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
9444 & ~pReNative->Core.bmHstRegsWithGstShadow
9445 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
9446 & fNotArgsMask;
9447 if (fRegs)
9448 {
9449 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
9450 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
9451 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
9452 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
9453 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
9454 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
9455 }
9456 else
9457 {
9458 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
9459 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
9460 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
9461 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
9462 }
9463 }
9464 else
9465 {
9466 idxReg = idxRegPref;
9467 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
9468 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
9469 }
9470 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
9471 pVar->idxReg = idxReg;
9472
9473 /*
9474 * Load it off the stack if we've got a stack slot.
9475 */
9476 uint8_t const idxStackSlot = pVar->idxStackSlot;
9477 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
9478 {
9479 Assert(fInitialized);
9480 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
9481 switch (pVar->cbVar)
9482 {
9483 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
9484 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
9485 case 3: AssertFailed(); RT_FALL_THRU();
9486 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
9487 default: AssertFailed(); RT_FALL_THRU();
9488 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
9489 }
9490 }
9491 else
9492 {
9493 Assert(idxStackSlot == UINT8_MAX);
9494 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
9495 }
9496 pVar->fRegAcquired = true;
9497 return idxReg;
9498}
9499
9500
9501/**
9502 * The value of variable @a idxVar will be written in full to the @a enmGstReg
9503 * guest register.
9504 *
9505 * This function makes sure there is a register for it and sets it to be the
9506 * current shadow copy of @a enmGstReg.
9507 *
9508 * @returns The host register number.
9509 * @param pReNative The recompiler state.
9510 * @param idxVar The variable.
9511 * @param enmGstReg The guest register this variable will be written to
9512 * after this call.
9513 * @param poff Pointer to the instruction buffer offset.
9514 * In case a register needs to be freed up or if the
9515 * variable content needs to be loaded off the stack.
9516 *
9517 * @note We DO NOT expect @a idxVar to be an argument variable,
9518 * because we can only in the commit stage of an instruction when this
9519 * function is used.
9520 */
9521DECL_HIDDEN_THROW(uint8_t)
9522iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
9523{
9524 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9525 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9526 Assert(!pVar->fRegAcquired);
9527 AssertMsgStmt( pVar->cbVar <= 8
9528 && ( pVar->enmKind == kIemNativeVarKind_Immediate
9529 || pVar->enmKind == kIemNativeVarKind_Stack),
9530 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
9531 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
9532 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
9533
9534 /*
9535 * This shouldn't ever be used for arguments, unless it's in a weird else
9536 * branch that doesn't do any calling and even then it's questionable.
9537 *
9538 * However, in case someone writes crazy wrong MC code and does register
9539 * updates before making calls, just use the regular register allocator to
9540 * ensure we get a register suitable for the intended argument number.
9541 */
9542 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
9543
9544 /*
9545 * If there is already a register for the variable, we transfer/set the
9546 * guest shadow copy assignment to it.
9547 */
9548 uint8_t idxReg = pVar->idxReg;
9549 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9550 {
9551 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
9552 {
9553 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
9554 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
9555 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
9556 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
9557 }
9558 else
9559 {
9560 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
9561 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
9562 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
9563 }
9564 /** @todo figure this one out. We need some way of making sure the register isn't
9565 * modified after this point, just in case we start writing crappy MC code. */
9566 pVar->enmGstReg = enmGstReg;
9567 pVar->fRegAcquired = true;
9568 return idxReg;
9569 }
9570 Assert(pVar->uArgNo == UINT8_MAX);
9571
9572 /*
9573 * Because this is supposed to be the commit stage, we're just tag along with the
9574 * temporary register allocator and upgrade it to a variable register.
9575 */
9576 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
9577 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
9578 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
9579 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
9580 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
9581 pVar->idxReg = idxReg;
9582
9583 /*
9584 * Now we need to load the register value.
9585 */
9586 if (pVar->enmKind == kIemNativeVarKind_Immediate)
9587 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
9588 else
9589 {
9590 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
9591 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
9592 switch (pVar->cbVar)
9593 {
9594 case sizeof(uint64_t):
9595 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
9596 break;
9597 case sizeof(uint32_t):
9598 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
9599 break;
9600 case sizeof(uint16_t):
9601 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
9602 break;
9603 case sizeof(uint8_t):
9604 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
9605 break;
9606 default:
9607 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
9608 }
9609 }
9610
9611 pVar->fRegAcquired = true;
9612 return idxReg;
9613}
9614
9615
9616/**
9617 * Sets the host register for @a idxVarRc to @a idxReg.
9618 *
9619 * The register must not be allocated. Any guest register shadowing will be
9620 * implictly dropped by this call.
9621 *
9622 * The variable must not have any register associated with it (causes
9623 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
9624 * implied.
9625 *
9626 * @returns idxReg
9627 * @param pReNative The recompiler state.
9628 * @param idxVar The variable.
9629 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
9630 * @param off For recording in debug info.
9631 *
9632 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
9633 */
9634DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
9635{
9636 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9637 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
9638 Assert(!pVar->fRegAcquired);
9639 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
9640 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
9641 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
9642
9643 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
9644 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
9645
9646 iemNativeVarSetKindToStack(pReNative, idxVar);
9647 pVar->idxReg = idxReg;
9648
9649 return idxReg;
9650}
9651
9652
9653/**
9654 * A convenient helper function.
9655 */
9656DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
9657 uint8_t idxReg, uint32_t *poff)
9658{
9659 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
9660 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
9661 return idxReg;
9662}
9663
9664
9665/**
9666 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
9667 *
9668 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
9669 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
9670 * requirement of flushing anything in volatile host registers when making a
9671 * call.
9672 *
9673 * @returns New @a off value.
9674 * @param pReNative The recompiler state.
9675 * @param off The code buffer position.
9676 * @param fHstRegsNotToSave Set of registers not to save & restore.
9677 */
9678DECL_HIDDEN_THROW(uint32_t)
9679iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
9680{
9681 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
9682 if (fHstRegs)
9683 {
9684 do
9685 {
9686 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
9687 fHstRegs &= ~RT_BIT_32(idxHstReg);
9688
9689 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
9690 {
9691 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
9692 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9693 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
9694 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
9695 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
9696 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
9697 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
9698 {
9699 case kIemNativeVarKind_Stack:
9700 {
9701 /* Temporarily spill the variable register. */
9702 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
9703 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
9704 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
9705 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
9706 continue;
9707 }
9708
9709 case kIemNativeVarKind_Immediate:
9710 case kIemNativeVarKind_VarRef:
9711 case kIemNativeVarKind_GstRegRef:
9712 /* It is weird to have any of these loaded at this point. */
9713 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
9714 continue;
9715
9716 case kIemNativeVarKind_End:
9717 case kIemNativeVarKind_Invalid:
9718 break;
9719 }
9720 AssertFailed();
9721 }
9722 else
9723 {
9724 /*
9725 * Allocate a temporary stack slot and spill the register to it.
9726 */
9727 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
9728 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
9729 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
9730 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
9731 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
9732 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
9733 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
9734 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
9735 }
9736 } while (fHstRegs);
9737 }
9738 return off;
9739}
9740
9741
9742/**
9743 * Emit code to restore volatile registers after to a call to a helper.
9744 *
9745 * @returns New @a off value.
9746 * @param pReNative The recompiler state.
9747 * @param off The code buffer position.
9748 * @param fHstRegsNotToSave Set of registers not to save & restore.
9749 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
9750 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
9751 */
9752DECL_HIDDEN_THROW(uint32_t)
9753iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
9754{
9755 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~fHstRegsNotToSave;
9756 if (fHstRegs)
9757 {
9758 do
9759 {
9760 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
9761 fHstRegs &= ~RT_BIT_32(idxHstReg);
9762
9763 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
9764 {
9765 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
9766 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9767 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
9768 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
9769 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
9770 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
9771 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
9772 {
9773 case kIemNativeVarKind_Stack:
9774 {
9775 /* Unspill the variable register. */
9776 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
9777 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
9778 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
9779 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
9780 continue;
9781 }
9782
9783 case kIemNativeVarKind_Immediate:
9784 case kIemNativeVarKind_VarRef:
9785 case kIemNativeVarKind_GstRegRef:
9786 /* It is weird to have any of these loaded at this point. */
9787 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
9788 continue;
9789
9790 case kIemNativeVarKind_End:
9791 case kIemNativeVarKind_Invalid:
9792 break;
9793 }
9794 AssertFailed();
9795 }
9796 else
9797 {
9798 /*
9799 * Restore from temporary stack slot.
9800 */
9801 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
9802 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
9803 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
9804 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
9805
9806 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
9807 }
9808 } while (fHstRegs);
9809 }
9810 return off;
9811}
9812
9813
9814/**
9815 * Worker that frees the stack slots for variable @a idxVar if any allocated.
9816 *
9817 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
9818 *
9819 * ASSUMES that @a idxVar is valid and unpacked.
9820 */
9821DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
9822{
9823 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
9824 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
9825 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
9826 {
9827 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
9828 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
9829 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
9830 Assert(cSlots > 0);
9831 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
9832 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
9833 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
9834 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
9835 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
9836 }
9837 else
9838 Assert(idxStackSlot == UINT8_MAX);
9839}
9840
9841
9842/**
9843 * Worker that frees a single variable.
9844 *
9845 * ASSUMES that @a idxVar is valid and unpacked.
9846 */
9847DECLINLINE(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
9848{
9849 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
9850 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
9851 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
9852
9853 /* Free the host register first if any assigned. */
9854 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
9855 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9856 {
9857 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
9858 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
9859 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
9860 }
9861
9862 /* Free argument mapping. */
9863 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
9864 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
9865 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
9866
9867 /* Free the stack slots. */
9868 iemNativeVarFreeStackSlots(pReNative, idxVar);
9869
9870 /* Free the actual variable. */
9871 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
9872 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
9873}
9874
9875
9876/**
9877 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
9878 */
9879DECLINLINE(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
9880{
9881 while (bmVars != 0)
9882 {
9883 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
9884 bmVars &= ~RT_BIT_32(idxVar);
9885
9886#if 1 /** @todo optimize by simplifying this later... */
9887 iemNativeVarFreeOneWorker(pReNative, idxVar);
9888#else
9889 /* Only need to free the host register, the rest is done as bulk updates below. */
9890 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
9891 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
9892 {
9893 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
9894 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
9895 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
9896 }
9897#endif
9898 }
9899#if 0 /** @todo optimize by simplifying this later... */
9900 pReNative->Core.bmVars = 0;
9901 pReNative->Core.bmStack = 0;
9902 pReNative->Core.u64ArgVars = UINT64_MAX;
9903#endif
9904}
9905
9906
9907/**
9908 * This is called by IEM_MC_END() to clean up all variables.
9909 */
9910DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
9911{
9912 uint32_t const bmVars = pReNative->Core.bmVars;
9913 if (bmVars != 0)
9914 iemNativeVarFreeAllSlow(pReNative, bmVars);
9915 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
9916 Assert(pReNative->Core.bmStack == 0);
9917}
9918
9919
9920#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
9921
9922/**
9923 * This is called by IEM_MC_FREE_LOCAL.
9924 */
9925DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
9926{
9927 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9928 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
9929 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
9930}
9931
9932
9933#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
9934
9935/**
9936 * This is called by IEM_MC_FREE_ARG.
9937 */
9938DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
9939{
9940 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
9941 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
9942 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
9943}
9944
9945
9946#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
9947
9948/**
9949 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
9950 */
9951DECL_INLINE_THROW(uint32_t)
9952iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
9953{
9954 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
9955 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
9956 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9957 Assert( pVarDst->cbVar == sizeof(uint16_t)
9958 || pVarDst->cbVar == sizeof(uint32_t));
9959
9960 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
9961 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
9962 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
9963 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
9964 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9965
9966 Assert(pVarDst->cbVar < pVarSrc->cbVar);
9967
9968 /*
9969 * Special case for immediates.
9970 */
9971 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
9972 {
9973 switch (pVarDst->cbVar)
9974 {
9975 case sizeof(uint16_t):
9976 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
9977 break;
9978 case sizeof(uint32_t):
9979 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
9980 break;
9981 default: AssertFailed(); break;
9982 }
9983 }
9984 else
9985 {
9986 /*
9987 * The generic solution for now.
9988 */
9989 /** @todo optimize this by having the python script make sure the source
9990 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
9991 * statement. Then we could just transfer the register assignments. */
9992 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
9993 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
9994 switch (pVarDst->cbVar)
9995 {
9996 case sizeof(uint16_t):
9997 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
9998 break;
9999 case sizeof(uint32_t):
10000 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
10001 break;
10002 default: AssertFailed(); break;
10003 }
10004 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
10005 iemNativeVarRegisterRelease(pReNative, idxVarDst);
10006 }
10007 return off;
10008}
10009
10010
10011
10012/*********************************************************************************************************************************
10013* Emitters for IEM_MC_CALL_CIMPL_XXX *
10014*********************************************************************************************************************************/
10015
10016/**
10017 * Emits code to load a reference to the given guest register into @a idxGprDst.
10018 */
10019DECL_INLINE_THROW(uint32_t)
10020iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
10021 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
10022{
10023#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
10024 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
10025#endif
10026
10027 /*
10028 * Get the offset relative to the CPUMCTX structure.
10029 */
10030 uint32_t offCpumCtx;
10031 switch (enmClass)
10032 {
10033 case kIemNativeGstRegRef_Gpr:
10034 Assert(idxRegInClass < 16);
10035 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
10036 break;
10037
10038 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
10039 Assert(idxRegInClass < 4);
10040 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
10041 break;
10042
10043 case kIemNativeGstRegRef_EFlags:
10044 Assert(idxRegInClass == 0);
10045 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
10046 break;
10047
10048 case kIemNativeGstRegRef_MxCsr:
10049 Assert(idxRegInClass == 0);
10050 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
10051 break;
10052
10053 case kIemNativeGstRegRef_FpuReg:
10054 Assert(idxRegInClass < 8);
10055 AssertFailed(); /** @todo what kind of indexing? */
10056 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
10057 break;
10058
10059 case kIemNativeGstRegRef_MReg:
10060 Assert(idxRegInClass < 8);
10061 AssertFailed(); /** @todo what kind of indexing? */
10062 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
10063 break;
10064
10065 case kIemNativeGstRegRef_XReg:
10066 Assert(idxRegInClass < 16);
10067 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
10068 break;
10069
10070 default:
10071 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
10072 }
10073
10074 /*
10075 * Load the value into the destination register.
10076 */
10077#ifdef RT_ARCH_AMD64
10078 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
10079
10080#elif defined(RT_ARCH_ARM64)
10081 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
10082 Assert(offCpumCtx < 4096);
10083 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
10084
10085#else
10086# error "Port me!"
10087#endif
10088
10089 return off;
10090}
10091
10092
10093/**
10094 * Common code for CIMPL and AIMPL calls.
10095 *
10096 * These are calls that uses argument variables and such. They should not be
10097 * confused with internal calls required to implement an MC operation,
10098 * like a TLB load and similar.
10099 *
10100 * Upon return all that is left to do is to load any hidden arguments and
10101 * perform the call. All argument variables are freed.
10102 *
10103 * @returns New code buffer offset; throws VBox status code on error.
10104 * @param pReNative The native recompile state.
10105 * @param off The code buffer offset.
10106 * @param cArgs The total nubmer of arguments (includes hidden
10107 * count).
10108 * @param cHiddenArgs The number of hidden arguments. The hidden
10109 * arguments must not have any variable declared for
10110 * them, whereas all the regular arguments must
10111 * (tstIEMCheckMc ensures this).
10112 */
10113DECL_HIDDEN_THROW(uint32_t)
10114iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs)
10115{
10116#ifdef VBOX_STRICT
10117 /*
10118 * Assert sanity.
10119 */
10120 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
10121 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
10122 for (unsigned i = 0; i < cHiddenArgs; i++)
10123 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
10124 for (unsigned i = cHiddenArgs; i < cArgs; i++)
10125 {
10126 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
10127 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
10128 }
10129 iemNativeRegAssertSanity(pReNative);
10130#endif
10131
10132 /* We don't know what the called function makes use of, so flush any pending register writes. */
10133 off = iemNativeRegFlushPendingWrites(pReNative, off);
10134
10135 /*
10136 * Before we do anything else, go over variables that are referenced and
10137 * make sure they are not in a register.
10138 */
10139 uint32_t bmVars = pReNative->Core.bmVars;
10140 if (bmVars)
10141 {
10142 do
10143 {
10144 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
10145 bmVars &= ~RT_BIT_32(idxVar);
10146
10147 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
10148 {
10149 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
10150 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
10151 {
10152 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
10153 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
10154 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
10155 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
10156 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
10157
10158 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
10159 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
10160 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
10161 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
10162 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
10163 }
10164 }
10165 } while (bmVars != 0);
10166#if 0 //def VBOX_STRICT
10167 iemNativeRegAssertSanity(pReNative);
10168#endif
10169 }
10170
10171 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
10172
10173 /*
10174 * First, go over the host registers that will be used for arguments and make
10175 * sure they either hold the desired argument or are free.
10176 */
10177 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
10178 {
10179 for (uint32_t i = 0; i < cRegArgs; i++)
10180 {
10181 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
10182 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
10183 {
10184 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
10185 {
10186 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
10187 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
10188 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
10189 Assert(pVar->idxReg == idxArgReg);
10190 uint8_t const uArgNo = pVar->uArgNo;
10191 if (uArgNo == i)
10192 { /* prefect */ }
10193 /* The variable allocator logic should make sure this is impossible,
10194 except for when the return register is used as a parameter (ARM,
10195 but not x86). */
10196#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
10197 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
10198 {
10199# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
10200# error "Implement this"
10201# endif
10202 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
10203 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
10204 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
10205 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
10206 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
10207 }
10208#endif
10209 else
10210 {
10211 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
10212
10213 if (pVar->enmKind == kIemNativeVarKind_Stack)
10214 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
10215 else
10216 {
10217 /* just free it, can be reloaded if used again */
10218 pVar->idxReg = UINT8_MAX;
10219 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
10220 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
10221 }
10222 }
10223 }
10224 else
10225 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
10226 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
10227 }
10228 }
10229#if 0 //def VBOX_STRICT
10230 iemNativeRegAssertSanity(pReNative);
10231#endif
10232 }
10233
10234 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
10235
10236#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
10237 /*
10238 * If there are any stack arguments, make sure they are in their place as well.
10239 *
10240 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
10241 * the caller) be loading it later and it must be free (see first loop).
10242 */
10243 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
10244 {
10245 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
10246 {
10247 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
10248 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
10249 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
10250 {
10251 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
10252 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
10253 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
10254 pVar->idxReg = UINT8_MAX;
10255 }
10256 else
10257 {
10258 /* Use ARG0 as temp for stuff we need registers for. */
10259 switch (pVar->enmKind)
10260 {
10261 case kIemNativeVarKind_Stack:
10262 {
10263 uint8_t const idxStackSlot = pVar->idxStackSlot;
10264 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
10265 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
10266 iemNativeStackCalcBpDisp(idxStackSlot));
10267 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
10268 continue;
10269 }
10270
10271 case kIemNativeVarKind_Immediate:
10272 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
10273 continue;
10274
10275 case kIemNativeVarKind_VarRef:
10276 {
10277 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
10278 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
10279 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
10280 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
10281 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
10282 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
10283 {
10284 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
10285 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
10286 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
10287 }
10288 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
10289 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
10290 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
10291 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
10292 continue;
10293 }
10294
10295 case kIemNativeVarKind_GstRegRef:
10296 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
10297 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
10298 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
10299 continue;
10300
10301 case kIemNativeVarKind_Invalid:
10302 case kIemNativeVarKind_End:
10303 break;
10304 }
10305 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
10306 }
10307 }
10308# if 0 //def VBOX_STRICT
10309 iemNativeRegAssertSanity(pReNative);
10310# endif
10311 }
10312#else
10313 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
10314#endif
10315
10316 /*
10317 * Make sure the argument variables are loaded into their respective registers.
10318 *
10319 * We can optimize this by ASSUMING that any register allocations are for
10320 * registeres that have already been loaded and are ready. The previous step
10321 * saw to that.
10322 */
10323 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
10324 {
10325 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
10326 {
10327 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
10328 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
10329 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
10330 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
10331 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
10332 else
10333 {
10334 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
10335 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
10336 {
10337 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
10338 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
10339 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
10340 | RT_BIT_32(idxArgReg);
10341 pVar->idxReg = idxArgReg;
10342 }
10343 else
10344 {
10345 /* Use ARG0 as temp for stuff we need registers for. */
10346 switch (pVar->enmKind)
10347 {
10348 case kIemNativeVarKind_Stack:
10349 {
10350 uint8_t const idxStackSlot = pVar->idxStackSlot;
10351 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
10352 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
10353 continue;
10354 }
10355
10356 case kIemNativeVarKind_Immediate:
10357 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
10358 continue;
10359
10360 case kIemNativeVarKind_VarRef:
10361 {
10362 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
10363 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
10364 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
10365 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
10366 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
10367 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
10368 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
10369 {
10370 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
10371 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
10372 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
10373 }
10374 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
10375 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
10376 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
10377 continue;
10378 }
10379
10380 case kIemNativeVarKind_GstRegRef:
10381 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
10382 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
10383 continue;
10384
10385 case kIemNativeVarKind_Invalid:
10386 case kIemNativeVarKind_End:
10387 break;
10388 }
10389 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
10390 }
10391 }
10392 }
10393#if 0 //def VBOX_STRICT
10394 iemNativeRegAssertSanity(pReNative);
10395#endif
10396 }
10397#ifdef VBOX_STRICT
10398 else
10399 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
10400 {
10401 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
10402 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
10403 }
10404#endif
10405
10406 /*
10407 * Free all argument variables (simplified).
10408 * Their lifetime always expires with the call they are for.
10409 */
10410 /** @todo Make the python script check that arguments aren't used after
10411 * IEM_MC_CALL_XXXX. */
10412 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
10413 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
10414 * an argument value. There is also some FPU stuff. */
10415 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
10416 {
10417 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
10418 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
10419
10420 /* no need to free registers: */
10421 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
10422 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
10423 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
10424 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
10425 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
10426 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
10427
10428 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
10429 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
10430 iemNativeVarFreeStackSlots(pReNative, idxVar);
10431 }
10432 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
10433
10434 /*
10435 * Flush volatile registers as we make the call.
10436 */
10437 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
10438
10439 return off;
10440}
10441
10442
10443/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
10444DECL_HIDDEN_THROW(uint32_t)
10445iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
10446 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
10447
10448{
10449 /*
10450 * Do all the call setup and cleanup.
10451 */
10452 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
10453
10454 /*
10455 * Load the two or three hidden arguments.
10456 */
10457#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
10458 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
10459 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
10460 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
10461#else
10462 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
10463 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
10464#endif
10465
10466 /*
10467 * Make the call and check the return code.
10468 *
10469 * Shadow PC copies are always flushed here, other stuff depends on flags.
10470 * Segment and general purpose registers are explictily flushed via the
10471 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
10472 * macros.
10473 */
10474 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
10475#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
10476 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
10477#endif
10478 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
10479 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
10480 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
10481 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
10482
10483 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
10484}
10485
10486
10487#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
10488 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
10489
10490/** Emits code for IEM_MC_CALL_CIMPL_1. */
10491DECL_INLINE_THROW(uint32_t)
10492iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
10493 uintptr_t pfnCImpl, uint8_t idxArg0)
10494{
10495 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
10496 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
10497}
10498
10499
10500#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
10501 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
10502
10503/** Emits code for IEM_MC_CALL_CIMPL_2. */
10504DECL_INLINE_THROW(uint32_t)
10505iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
10506 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
10507{
10508 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
10509 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
10510 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
10511}
10512
10513
10514#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
10515 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
10516 (uintptr_t)a_pfnCImpl, a0, a1, a2)
10517
10518/** Emits code for IEM_MC_CALL_CIMPL_3. */
10519DECL_INLINE_THROW(uint32_t)
10520iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
10521 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
10522{
10523 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
10524 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
10525 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
10526 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
10527}
10528
10529
10530#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
10531 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
10532 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
10533
10534/** Emits code for IEM_MC_CALL_CIMPL_4. */
10535DECL_INLINE_THROW(uint32_t)
10536iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
10537 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
10538{
10539 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
10540 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
10541 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
10542 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
10543 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
10544}
10545
10546
10547#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
10548 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
10549 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
10550
10551/** Emits code for IEM_MC_CALL_CIMPL_4. */
10552DECL_INLINE_THROW(uint32_t)
10553iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
10554 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
10555{
10556 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
10557 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
10558 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
10559 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
10560 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
10561 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
10562}
10563
10564
10565/** Recompiler debugging: Flush guest register shadow copies. */
10566#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
10567
10568
10569
10570/*********************************************************************************************************************************
10571* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
10572*********************************************************************************************************************************/
10573
10574/**
10575 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
10576 */
10577DECL_INLINE_THROW(uint32_t)
10578iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
10579 uintptr_t pfnAImpl, uint8_t cArgs)
10580{
10581 if (idxVarRc != UINT8_MAX)
10582 {
10583 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
10584 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
10585 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
10586 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
10587 }
10588
10589 /*
10590 * Do all the call setup and cleanup.
10591 */
10592 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/);
10593
10594 /*
10595 * Make the call and update the return code variable if we've got one.
10596 */
10597 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
10598 if (idxVarRc != UINT8_MAX)
10599 {
10600off = iemNativeEmitBrk(pReNative, off, 0x4222); /** @todo test IEM_MC_CALL_AIMPL_3 and IEM_MC_CALL_AIMPL_4 return codes. */
10601 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
10602 }
10603
10604 return off;
10605}
10606
10607
10608
10609#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
10610 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
10611
10612#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
10613 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
10614
10615/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
10616DECL_INLINE_THROW(uint32_t)
10617iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
10618{
10619 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
10620}
10621
10622
10623#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
10624 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
10625
10626#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
10627 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
10628
10629/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
10630DECL_INLINE_THROW(uint32_t)
10631iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
10632{
10633 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
10634 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
10635}
10636
10637
10638#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
10639 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
10640
10641#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
10642 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
10643
10644/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
10645DECL_INLINE_THROW(uint32_t)
10646iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
10647 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
10648{
10649 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
10650 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
10651 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
10652}
10653
10654
10655#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
10656 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
10657
10658#define IEM_MC_CALL_AIMPL_3(a_rc, a_pfn, a0, a1, a2) \
10659 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
10660
10661/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
10662DECL_INLINE_THROW(uint32_t)
10663iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
10664 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
10665{
10666 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
10667 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
10668 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
10669 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
10670}
10671
10672
10673#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
10674 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
10675
10676#define IEM_MC_CALL_AIMPL_4(a_rc, a_pfn, a0, a1, a2, a3) \
10677 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
10678
10679/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
10680DECL_INLINE_THROW(uint32_t)
10681iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
10682 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
10683{
10684 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
10685 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
10686 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
10687 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
10688 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
10689}
10690
10691
10692
10693/*********************************************************************************************************************************
10694* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
10695*********************************************************************************************************************************/
10696
10697#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
10698 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
10699
10700#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
10701 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
10702
10703#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
10704 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
10705
10706#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
10707 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
10708
10709
10710/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
10711 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
10712DECL_INLINE_THROW(uint32_t)
10713iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
10714{
10715 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10716 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
10717 Assert(iGRegEx < 20);
10718
10719 /* Same discussion as in iemNativeEmitFetchGregU16 */
10720 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
10721 kIemNativeGstRegUse_ReadOnly);
10722
10723 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10724 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10725
10726 /* The value is zero-extended to the full 64-bit host register width. */
10727 if (iGRegEx < 16)
10728 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
10729 else
10730 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
10731
10732 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10733 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
10734 return off;
10735}
10736
10737
10738#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
10739 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
10740
10741#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
10742 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
10743
10744#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
10745 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
10746
10747/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
10748DECL_INLINE_THROW(uint32_t)
10749iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
10750{
10751 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10752 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
10753 Assert(iGRegEx < 20);
10754
10755 /* Same discussion as in iemNativeEmitFetchGregU16 */
10756 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
10757 kIemNativeGstRegUse_ReadOnly);
10758
10759 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10760 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10761
10762 if (iGRegEx < 16)
10763 {
10764 switch (cbSignExtended)
10765 {
10766 case sizeof(uint16_t):
10767 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
10768 break;
10769 case sizeof(uint32_t):
10770 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
10771 break;
10772 case sizeof(uint64_t):
10773 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
10774 break;
10775 default: AssertFailed(); break;
10776 }
10777 }
10778 else
10779 {
10780 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
10781 switch (cbSignExtended)
10782 {
10783 case sizeof(uint16_t):
10784 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
10785 break;
10786 case sizeof(uint32_t):
10787 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
10788 break;
10789 case sizeof(uint64_t):
10790 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
10791 break;
10792 default: AssertFailed(); break;
10793 }
10794 }
10795
10796 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10797 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
10798 return off;
10799}
10800
10801
10802
10803#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
10804 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
10805
10806#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
10807 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
10808
10809#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
10810 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
10811
10812/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
10813DECL_INLINE_THROW(uint32_t)
10814iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
10815{
10816 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10817 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
10818 Assert(iGReg < 16);
10819
10820 /*
10821 * We can either just load the low 16-bit of the GPR into a host register
10822 * for the variable, or we can do so via a shadow copy host register. The
10823 * latter will avoid having to reload it if it's being stored later, but
10824 * will waste a host register if it isn't touched again. Since we don't
10825 * know what going to happen, we choose the latter for now.
10826 */
10827 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10828 kIemNativeGstRegUse_ReadOnly);
10829
10830 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10831 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10832 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
10833 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10834
10835 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
10836 return off;
10837}
10838
10839
10840#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
10841 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
10842
10843#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
10844 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
10845
10846/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
10847DECL_INLINE_THROW(uint32_t)
10848iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
10849{
10850 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10851 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
10852 Assert(iGReg < 16);
10853
10854 /*
10855 * We can either just load the low 16-bit of the GPR into a host register
10856 * for the variable, or we can do so via a shadow copy host register. The
10857 * latter will avoid having to reload it if it's being stored later, but
10858 * will waste a host register if it isn't touched again. Since we don't
10859 * know what going to happen, we choose the latter for now.
10860 */
10861 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10862 kIemNativeGstRegUse_ReadOnly);
10863
10864 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10865 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10866 if (cbSignExtended == sizeof(uint32_t))
10867 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
10868 else
10869 {
10870 Assert(cbSignExtended == sizeof(uint64_t));
10871 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
10872 }
10873 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10874
10875 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
10876 return off;
10877}
10878
10879
10880#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
10881 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
10882
10883#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
10884 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
10885
10886/** Emits code for IEM_MC_FETCH_GREG_U32. */
10887DECL_INLINE_THROW(uint32_t)
10888iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
10889{
10890 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10891 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
10892 Assert(iGReg < 16);
10893
10894 /*
10895 * We can either just load the low 16-bit of the GPR into a host register
10896 * for the variable, or we can do so via a shadow copy host register. The
10897 * latter will avoid having to reload it if it's being stored later, but
10898 * will waste a host register if it isn't touched again. Since we don't
10899 * know what going to happen, we choose the latter for now.
10900 */
10901 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10902 kIemNativeGstRegUse_ReadOnly);
10903
10904 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10905 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10906 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
10907 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10908
10909 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
10910 return off;
10911}
10912
10913
10914#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
10915 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
10916
10917/** Emits code for IEM_MC_FETCH_GREG_U32. */
10918DECL_INLINE_THROW(uint32_t)
10919iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
10920{
10921 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10922 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
10923 Assert(iGReg < 16);
10924
10925 /*
10926 * We can either just load the low 32-bit of the GPR into a host register
10927 * for the variable, or we can do so via a shadow copy host register. The
10928 * latter will avoid having to reload it if it's being stored later, but
10929 * will waste a host register if it isn't touched again. Since we don't
10930 * know what going to happen, we choose the latter for now.
10931 */
10932 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10933 kIemNativeGstRegUse_ReadOnly);
10934
10935 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10936 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10937 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
10938 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10939
10940 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
10941 return off;
10942}
10943
10944
10945#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
10946 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
10947
10948#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
10949 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
10950
10951/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
10952 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
10953DECL_INLINE_THROW(uint32_t)
10954iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
10955{
10956 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10957 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
10958 Assert(iGReg < 16);
10959
10960 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
10961 kIemNativeGstRegUse_ReadOnly);
10962
10963 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10964 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10965 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
10966 /** @todo name the register a shadow one already? */
10967 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10968
10969 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
10970 return off;
10971}
10972
10973
10974
10975/*********************************************************************************************************************************
10976* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
10977*********************************************************************************************************************************/
10978
10979#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
10980 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
10981
10982/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
10983DECL_INLINE_THROW(uint32_t)
10984iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
10985{
10986 Assert(iGRegEx < 20);
10987 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
10988 kIemNativeGstRegUse_ForUpdate);
10989#ifdef RT_ARCH_AMD64
10990 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
10991
10992 /* To the lowest byte of the register: mov r8, imm8 */
10993 if (iGRegEx < 16)
10994 {
10995 if (idxGstTmpReg >= 8)
10996 pbCodeBuf[off++] = X86_OP_REX_B;
10997 else if (idxGstTmpReg >= 4)
10998 pbCodeBuf[off++] = X86_OP_REX;
10999 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
11000 pbCodeBuf[off++] = u8Value;
11001 }
11002 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
11003 else if (idxGstTmpReg < 4)
11004 {
11005 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
11006 pbCodeBuf[off++] = u8Value;
11007 }
11008 else
11009 {
11010 /* ror reg64, 8 */
11011 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
11012 pbCodeBuf[off++] = 0xc1;
11013 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
11014 pbCodeBuf[off++] = 8;
11015
11016 /* mov reg8, imm8 */
11017 if (idxGstTmpReg >= 8)
11018 pbCodeBuf[off++] = X86_OP_REX_B;
11019 else if (idxGstTmpReg >= 4)
11020 pbCodeBuf[off++] = X86_OP_REX;
11021 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
11022 pbCodeBuf[off++] = u8Value;
11023
11024 /* rol reg64, 8 */
11025 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
11026 pbCodeBuf[off++] = 0xc1;
11027 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
11028 pbCodeBuf[off++] = 8;
11029 }
11030
11031#elif defined(RT_ARCH_ARM64)
11032 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
11033 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
11034 if (iGRegEx < 16)
11035 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
11036 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
11037 else
11038 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
11039 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
11040 iemNativeRegFreeTmp(pReNative, idxImmReg);
11041
11042#else
11043# error "Port me!"
11044#endif
11045
11046 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11047
11048 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
11049
11050 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11051 return off;
11052}
11053
11054
11055#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
11056 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
11057
11058/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
11059DECL_INLINE_THROW(uint32_t)
11060iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
11061{
11062 Assert(iGRegEx < 20);
11063 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
11064
11065 /*
11066 * If it's a constant value (unlikely) we treat this as a
11067 * IEM_MC_STORE_GREG_U8_CONST statement.
11068 */
11069 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
11070 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
11071 { /* likely */ }
11072 else
11073 {
11074 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
11075 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
11076 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
11077 }
11078
11079 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
11080 kIemNativeGstRegUse_ForUpdate);
11081 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
11082
11083#ifdef RT_ARCH_AMD64
11084 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
11085 if (iGRegEx < 16)
11086 {
11087 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
11088 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
11089 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
11090 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
11091 pbCodeBuf[off++] = X86_OP_REX;
11092 pbCodeBuf[off++] = 0x8a;
11093 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
11094 }
11095 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
11096 else if (idxGstTmpReg < 4 && idxVarReg < 4)
11097 {
11098 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
11099 pbCodeBuf[off++] = 0x8a;
11100 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
11101 }
11102 else
11103 {
11104 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
11105
11106 /* ror reg64, 8 */
11107 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
11108 pbCodeBuf[off++] = 0xc1;
11109 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
11110 pbCodeBuf[off++] = 8;
11111
11112 /* mov reg8, reg8(r/m) */
11113 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
11114 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
11115 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
11116 pbCodeBuf[off++] = X86_OP_REX;
11117 pbCodeBuf[off++] = 0x8a;
11118 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
11119
11120 /* rol reg64, 8 */
11121 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
11122 pbCodeBuf[off++] = 0xc1;
11123 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
11124 pbCodeBuf[off++] = 8;
11125 }
11126
11127#elif defined(RT_ARCH_ARM64)
11128 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
11129 or
11130 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
11131 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11132 if (iGRegEx < 16)
11133 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
11134 else
11135 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
11136
11137#else
11138# error "Port me!"
11139#endif
11140 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11141
11142 iemNativeVarRegisterRelease(pReNative, idxValueVar);
11143
11144 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
11145 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11146 return off;
11147}
11148
11149
11150
11151#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
11152 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
11153
11154/** Emits code for IEM_MC_STORE_GREG_U16. */
11155DECL_INLINE_THROW(uint32_t)
11156iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
11157{
11158 Assert(iGReg < 16);
11159 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11160 kIemNativeGstRegUse_ForUpdate);
11161#ifdef RT_ARCH_AMD64
11162 /* mov reg16, imm16 */
11163 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
11164 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
11165 if (idxGstTmpReg >= 8)
11166 pbCodeBuf[off++] = X86_OP_REX_B;
11167 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
11168 pbCodeBuf[off++] = RT_BYTE1(uValue);
11169 pbCodeBuf[off++] = RT_BYTE2(uValue);
11170
11171#elif defined(RT_ARCH_ARM64)
11172 /* movk xdst, #uValue, lsl #0 */
11173 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11174 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
11175
11176#else
11177# error "Port me!"
11178#endif
11179
11180 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11181
11182 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11183 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11184 return off;
11185}
11186
11187
11188#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
11189 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
11190
11191/** Emits code for IEM_MC_STORE_GREG_U16. */
11192DECL_INLINE_THROW(uint32_t)
11193iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
11194{
11195 Assert(iGReg < 16);
11196 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
11197
11198 /*
11199 * If it's a constant value (unlikely) we treat this as a
11200 * IEM_MC_STORE_GREG_U16_CONST statement.
11201 */
11202 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
11203 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
11204 { /* likely */ }
11205 else
11206 {
11207 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
11208 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
11209 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
11210 }
11211
11212 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11213 kIemNativeGstRegUse_ForUpdate);
11214
11215#ifdef RT_ARCH_AMD64
11216 /* mov reg16, reg16 or [mem16] */
11217 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
11218 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
11219 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
11220 {
11221 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
11222 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
11223 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
11224 pbCodeBuf[off++] = 0x8b;
11225 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
11226 }
11227 else
11228 {
11229 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
11230 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
11231 if (idxGstTmpReg >= 8)
11232 pbCodeBuf[off++] = X86_OP_REX_R;
11233 pbCodeBuf[off++] = 0x8b;
11234 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
11235 }
11236
11237#elif defined(RT_ARCH_ARM64)
11238 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
11239 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
11240 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11241 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
11242 iemNativeVarRegisterRelease(pReNative, idxValueVar);
11243
11244#else
11245# error "Port me!"
11246#endif
11247
11248 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11249
11250 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11251 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11252 return off;
11253}
11254
11255
11256#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
11257 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
11258
11259/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
11260DECL_INLINE_THROW(uint32_t)
11261iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
11262{
11263 Assert(iGReg < 16);
11264 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11265 kIemNativeGstRegUse_ForFullWrite);
11266 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
11267 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11268 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11269 return off;
11270}
11271
11272
11273#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
11274 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
11275
11276/** Emits code for IEM_MC_STORE_GREG_U32. */
11277DECL_INLINE_THROW(uint32_t)
11278iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
11279{
11280 Assert(iGReg < 16);
11281 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
11282
11283 /*
11284 * If it's a constant value (unlikely) we treat this as a
11285 * IEM_MC_STORE_GREG_U32_CONST statement.
11286 */
11287 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
11288 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
11289 { /* likely */ }
11290 else
11291 {
11292 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
11293 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
11294 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
11295 }
11296
11297 /*
11298 * For the rest we allocate a guest register for the variable and writes
11299 * it to the CPUMCTX structure.
11300 */
11301 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
11302 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11303#ifdef VBOX_STRICT
11304 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
11305#endif
11306 iemNativeVarRegisterRelease(pReNative, idxValueVar);
11307 return off;
11308}
11309
11310
11311#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
11312 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
11313
11314/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
11315DECL_INLINE_THROW(uint32_t)
11316iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
11317{
11318 Assert(iGReg < 16);
11319 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11320 kIemNativeGstRegUse_ForFullWrite);
11321 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
11322 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11323 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11324 return off;
11325}
11326
11327
11328#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
11329 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
11330
11331/** Emits code for IEM_MC_STORE_GREG_U64. */
11332DECL_INLINE_THROW(uint32_t)
11333iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
11334{
11335 Assert(iGReg < 16);
11336 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
11337
11338 /*
11339 * If it's a constant value (unlikely) we treat this as a
11340 * IEM_MC_STORE_GREG_U64_CONST statement.
11341 */
11342 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
11343 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
11344 { /* likely */ }
11345 else
11346 {
11347 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
11348 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
11349 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
11350 }
11351
11352 /*
11353 * For the rest we allocate a guest register for the variable and writes
11354 * it to the CPUMCTX structure.
11355 */
11356 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
11357 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11358 iemNativeVarRegisterRelease(pReNative, idxValueVar);
11359 return off;
11360}
11361
11362
11363#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
11364 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
11365
11366/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
11367DECL_INLINE_THROW(uint32_t)
11368iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
11369{
11370 Assert(iGReg < 16);
11371 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11372 kIemNativeGstRegUse_ForUpdate);
11373 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
11374 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11375 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11376 return off;
11377}
11378
11379
11380/*********************************************************************************************************************************
11381* General purpose register manipulation (add, sub). *
11382*********************************************************************************************************************************/
11383
11384#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
11385 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
11386
11387/** Emits code for IEM_MC_ADD_GREG_U16. */
11388DECL_INLINE_THROW(uint32_t)
11389iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
11390{
11391 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11392 kIemNativeGstRegUse_ForUpdate);
11393
11394#ifdef RT_ARCH_AMD64
11395 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
11396 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
11397 if (idxGstTmpReg >= 8)
11398 pbCodeBuf[off++] = X86_OP_REX_B;
11399 if (uAddend == 1)
11400 {
11401 pbCodeBuf[off++] = 0xff; /* inc */
11402 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
11403 }
11404 else
11405 {
11406 pbCodeBuf[off++] = 0x81;
11407 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
11408 pbCodeBuf[off++] = uAddend;
11409 pbCodeBuf[off++] = 0;
11410 }
11411
11412#else
11413 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
11414 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
11415
11416 /* sub tmp, gstgrp, uAddend */
11417 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
11418
11419 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
11420 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
11421
11422 iemNativeRegFreeTmp(pReNative, idxTmpReg);
11423#endif
11424
11425 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11426
11427 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11428
11429 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11430 return off;
11431}
11432
11433
11434#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
11435 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
11436
11437#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
11438 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
11439
11440/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
11441DECL_INLINE_THROW(uint32_t)
11442iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
11443{
11444 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11445 kIemNativeGstRegUse_ForUpdate);
11446
11447#ifdef RT_ARCH_AMD64
11448 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
11449 if (f64Bit)
11450 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
11451 else if (idxGstTmpReg >= 8)
11452 pbCodeBuf[off++] = X86_OP_REX_B;
11453 if (uAddend == 1)
11454 {
11455 pbCodeBuf[off++] = 0xff; /* inc */
11456 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
11457 }
11458 else if (uAddend < 128)
11459 {
11460 pbCodeBuf[off++] = 0x83; /* add */
11461 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
11462 pbCodeBuf[off++] = RT_BYTE1(uAddend);
11463 }
11464 else
11465 {
11466 pbCodeBuf[off++] = 0x81; /* add */
11467 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
11468 pbCodeBuf[off++] = RT_BYTE1(uAddend);
11469 pbCodeBuf[off++] = 0;
11470 pbCodeBuf[off++] = 0;
11471 pbCodeBuf[off++] = 0;
11472 }
11473
11474#else
11475 /* sub tmp, gstgrp, uAddend */
11476 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11477 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
11478
11479#endif
11480
11481 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11482
11483 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11484
11485 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11486 return off;
11487}
11488
11489
11490
11491#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
11492 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
11493
11494/** Emits code for IEM_MC_SUB_GREG_U16. */
11495DECL_INLINE_THROW(uint32_t)
11496iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
11497{
11498 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11499 kIemNativeGstRegUse_ForUpdate);
11500
11501#ifdef RT_ARCH_AMD64
11502 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
11503 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
11504 if (idxGstTmpReg >= 8)
11505 pbCodeBuf[off++] = X86_OP_REX_B;
11506 if (uSubtrahend == 1)
11507 {
11508 pbCodeBuf[off++] = 0xff; /* dec */
11509 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
11510 }
11511 else
11512 {
11513 pbCodeBuf[off++] = 0x81;
11514 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
11515 pbCodeBuf[off++] = uSubtrahend;
11516 pbCodeBuf[off++] = 0;
11517 }
11518
11519#else
11520 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
11521 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
11522
11523 /* sub tmp, gstgrp, uSubtrahend */
11524 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
11525
11526 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
11527 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
11528
11529 iemNativeRegFreeTmp(pReNative, idxTmpReg);
11530#endif
11531
11532 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11533
11534 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11535
11536 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11537 return off;
11538}
11539
11540
11541#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
11542 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
11543
11544#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
11545 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
11546
11547/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
11548DECL_INLINE_THROW(uint32_t)
11549iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
11550{
11551 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
11552 kIemNativeGstRegUse_ForUpdate);
11553
11554#ifdef RT_ARCH_AMD64
11555 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
11556 if (f64Bit)
11557 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
11558 else if (idxGstTmpReg >= 8)
11559 pbCodeBuf[off++] = X86_OP_REX_B;
11560 if (uSubtrahend == 1)
11561 {
11562 pbCodeBuf[off++] = 0xff; /* dec */
11563 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
11564 }
11565 else if (uSubtrahend < 128)
11566 {
11567 pbCodeBuf[off++] = 0x83; /* sub */
11568 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
11569 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
11570 }
11571 else
11572 {
11573 pbCodeBuf[off++] = 0x81; /* sub */
11574 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
11575 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
11576 pbCodeBuf[off++] = 0;
11577 pbCodeBuf[off++] = 0;
11578 pbCodeBuf[off++] = 0;
11579 }
11580
11581#else
11582 /* sub tmp, gstgrp, uSubtrahend */
11583 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
11584 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
11585
11586#endif
11587
11588 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
11589
11590 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
11591
11592 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
11593 return off;
11594}
11595
11596
11597/*********************************************************************************************************************************
11598* Local variable manipulation (add, sub, and, or). *
11599*********************************************************************************************************************************/
11600
11601#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
11602 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
11603
11604#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
11605 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
11606
11607#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
11608 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
11609
11610#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
11611 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
11612
11613/** Emits code for AND'ing a local and a constant value. */
11614DECL_INLINE_THROW(uint32_t)
11615iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
11616{
11617#ifdef VBOX_STRICT
11618 switch (cbMask)
11619 {
11620 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
11621 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
11622 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
11623 case sizeof(uint64_t): break;
11624 default: AssertFailedBreak();
11625 }
11626#endif
11627
11628 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
11629 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
11630
11631 if (cbMask <= sizeof(uint32_t))
11632 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
11633 else
11634 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
11635
11636 iemNativeVarRegisterRelease(pReNative, idxVar);
11637 return off;
11638}
11639
11640
11641#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
11642 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
11643
11644#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
11645 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
11646
11647#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
11648 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
11649
11650#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
11651 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
11652
11653/** Emits code for OR'ing a local and a constant value. */
11654DECL_INLINE_THROW(uint32_t)
11655iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
11656{
11657#ifdef VBOX_STRICT
11658 switch (cbMask)
11659 {
11660 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
11661 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
11662 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
11663 case sizeof(uint64_t): break;
11664 default: AssertFailedBreak();
11665 }
11666#endif
11667
11668 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
11669 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
11670
11671 if (cbMask <= sizeof(uint32_t))
11672 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
11673 else
11674 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
11675
11676 iemNativeVarRegisterRelease(pReNative, idxVar);
11677 return off;
11678}
11679
11680
11681#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
11682 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
11683
11684#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
11685 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
11686
11687#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
11688 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
11689
11690/** Emits code for reversing the byte order in a local value. */
11691DECL_INLINE_THROW(uint32_t)
11692iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
11693{
11694 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
11695 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
11696
11697 switch (cbLocal)
11698 {
11699 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
11700 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
11701 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
11702 default: AssertFailedBreak();
11703 }
11704
11705 iemNativeVarRegisterRelease(pReNative, idxVar);
11706 return off;
11707}
11708
11709
11710
11711/*********************************************************************************************************************************
11712* EFLAGS *
11713*********************************************************************************************************************************/
11714
11715#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
11716# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
11717#else
11718# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
11719 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
11720
11721DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
11722{
11723 if (fEflOutput)
11724 {
11725 PVMCPUCC const pVCpu = pReNative->pVCpu;
11726# ifndef IEMLIVENESS_EXTENDED_LAYOUT
11727 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
11728 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
11729 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
11730# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
11731 if (fEflOutput & (a_fEfl)) \
11732 { \
11733 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
11734 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
11735 else \
11736 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
11737 } else do { } while (0)
11738# else
11739 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
11740 IEMLIVENESSBIT const LivenessClobbered =
11741 {
11742 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
11743 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
11744 | pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
11745 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
11746 };
11747 IEMLIVENESSBIT const LivenessDelayable =
11748 {
11749 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
11750 & pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
11751 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
11752 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
11753 };
11754# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
11755 if (fEflOutput & (a_fEfl)) \
11756 { \
11757 if (LivenessClobbered.a_fLivenessMember) \
11758 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
11759 else if (LivenessDelayable.a_fLivenessMember) \
11760 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
11761 else \
11762 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
11763 } else do { } while (0)
11764# endif
11765 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
11766 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
11767 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
11768 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
11769 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
11770 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
11771 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
11772# undef CHECK_FLAG_AND_UPDATE_STATS
11773 }
11774 RT_NOREF(fEflInput);
11775}
11776#endif /* VBOX_WITH_STATISTICS */
11777
11778#undef IEM_MC_FETCH_EFLAGS /* should not be used */
11779#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
11780 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags, a_fEflInput, a_fEflOutput)
11781
11782/** Handles IEM_MC_FETCH_EFLAGS_EX. */
11783DECL_INLINE_THROW(uint32_t)
11784iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags,
11785 uint32_t fEflInput, uint32_t fEflOutput)
11786{
11787 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
11788 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
11789 RT_NOREF(fEflInput, fEflOutput);
11790
11791#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
11792# ifdef VBOX_STRICT
11793 if ( pReNative->idxCurCall != 0
11794 && (fEflInput != 0 || fEflOutput != 0) /* for NOT these are both zero for now. */)
11795 {
11796 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
11797 uint32_t const fBoth = fEflInput | fEflOutput;
11798# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
11799 AssertMsg( !(fBoth & (a_fElfConst)) \
11800 || (!(fEflInput & (a_fElfConst)) \
11801 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
11802 : !(fEflOutput & (a_fElfConst)) \
11803 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
11804 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
11805 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
11806 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
11807 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
11808 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
11809 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
11810 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
11811 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
11812 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
11813# undef ASSERT_ONE_EFL
11814 }
11815# endif
11816#endif
11817
11818 /** @todo this is suboptimial. EFLAGS is probably shadowed and we should use
11819 * the existing shadow copy. */
11820 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
11821 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
11822 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
11823 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
11824 return off;
11825}
11826
11827
11828
11829/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
11830 * start using it with custom native code emission (inlining assembly
11831 * instruction helpers). */
11832#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
11833#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
11834 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
11835 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput)
11836
11837/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
11838DECL_INLINE_THROW(uint32_t)
11839iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fEflOutput)
11840{
11841 RT_NOREF(fEflOutput);
11842 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
11843 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
11844
11845#ifdef VBOX_STRICT
11846 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
11847 uint32_t offFixup = off;
11848 off = iemNativeEmitJnzToFixed(pReNative, off, off);
11849 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
11850 iemNativeFixupFixedJump(pReNative, offFixup, off);
11851
11852 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
11853 offFixup = off;
11854 off = iemNativeEmitJzToFixed(pReNative, off, off);
11855 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
11856 iemNativeFixupFixedJump(pReNative, offFixup, off);
11857
11858 /** @todo validate that only bits in the fElfOutput mask changed. */
11859#endif
11860
11861 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
11862 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
11863 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
11864 return off;
11865}
11866
11867
11868
11869/*********************************************************************************************************************************
11870* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
11871*********************************************************************************************************************************/
11872
11873#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
11874 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
11875
11876#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
11877 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
11878
11879#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
11880 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
11881
11882
11883/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
11884 * IEM_MC_FETCH_SREG_ZX_U64. */
11885DECL_INLINE_THROW(uint32_t)
11886iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
11887{
11888 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
11889 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
11890 Assert(iSReg < X86_SREG_COUNT);
11891
11892 /*
11893 * For now, we will not create a shadow copy of a selector. The rational
11894 * is that since we do not recompile the popping and loading of segment
11895 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
11896 * pushing and moving to registers, there is only a small chance that the
11897 * shadow copy will be accessed again before the register is reloaded. One
11898 * scenario would be nested called in 16-bit code, but I doubt it's worth
11899 * the extra register pressure atm.
11900 *
11901 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
11902 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
11903 * store scencario covered at present (r160730).
11904 */
11905 iemNativeVarSetKindToStack(pReNative, idxDstVar);
11906 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
11907 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
11908 iemNativeVarRegisterRelease(pReNative, idxDstVar);
11909 return off;
11910}
11911
11912
11913
11914/*********************************************************************************************************************************
11915* Register references. *
11916*********************************************************************************************************************************/
11917
11918#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
11919 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
11920
11921#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGRegEx) \
11922 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
11923
11924/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
11925DECL_INLINE_THROW(uint32_t)
11926iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
11927{
11928 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
11929 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
11930 Assert(iGRegEx < 20);
11931
11932 if (iGRegEx < 16)
11933 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
11934 else
11935 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
11936
11937 /* If we've delayed writing back the register value, flush it now. */
11938 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
11939
11940 /* If it's not a const reference we need to flush the shadow copy of the register now. */
11941 if (!fConst)
11942 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
11943
11944 return off;
11945}
11946
11947#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
11948 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
11949
11950#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
11951 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
11952
11953#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
11954 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
11955
11956#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
11957 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
11958
11959#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
11960 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
11961
11962#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
11963 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
11964
11965#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
11966 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
11967
11968#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
11969 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
11970
11971#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
11972 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
11973
11974#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
11975 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
11976
11977/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
11978DECL_INLINE_THROW(uint32_t)
11979iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
11980{
11981 Assert(iGReg < 16);
11982 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
11983 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
11984
11985 /* If we've delayed writing back the register value, flush it now. */
11986 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
11987
11988 /* If it's not a const reference we need to flush the shadow copy of the register now. */
11989 if (!fConst)
11990 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
11991
11992 return off;
11993}
11994
11995
11996#undef IEM_MC_REF_EFLAGS /* should not be used. */
11997#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
11998 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
11999 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags)
12000
12001/** Handles IEM_MC_REF_EFLAGS. */
12002DECL_INLINE_THROW(uint32_t)
12003iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
12004{
12005 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
12006 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
12007
12008 /* If we've delayed writing back the register value, flush it now. */
12009 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
12010
12011 /* If there is a shadow copy of guest EFLAGS, flush it now. */
12012 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
12013
12014 return off;
12015}
12016
12017
12018/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
12019 * different code from threaded recompiler, maybe it would be helpful. For now
12020 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
12021#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
12022
12023
12024#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg) \
12025 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, false /*fConst*/)
12026
12027#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
12028 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, true /*fConst*/)
12029
12030#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
12031 off = iemNativeEmitRefXregXxx(pReNative, off, a_pXmmDst, a_iXReg, true /*fConst*/)
12032
12033/** Handles IEM_MC_REF_XREG_xxx[_CONST]. */
12034DECL_INLINE_THROW(uint32_t)
12035iemNativeEmitRefXregXxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iXReg, bool fConst)
12036{
12037 Assert(iXReg < 16);
12038 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_XReg, iXReg);
12039 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
12040
12041 /* If we've delayed writing back the register value, flush it now. */
12042 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_XReg, iXReg);
12043
12044#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
12045 /* If it's not a const reference we need to flush the shadow copy of the register now. */
12046 if (!fConst)
12047 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(iXReg)));
12048#else
12049 RT_NOREF(fConst);
12050#endif
12051
12052 return off;
12053}
12054
12055
12056#define IEM_MC_REF_MXCSR(a_pfMxcsr) \
12057 off = iemNativeEmitRefMxcsr(pReNative, off, a_pfMxcsr)
12058
12059/** Handles IEM_MC_REF_MXCSR. */
12060DECL_INLINE_THROW(uint32_t)
12061iemNativeEmitRefMxcsr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef)
12062{
12063 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_MxCsr, 0);
12064 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
12065
12066 /* If we've delayed writing back the register value, flush it now. */
12067 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_MxCsr, 0);
12068
12069 /* If there is a shadow copy of guest MXCSR, flush it now. */
12070 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_MxCsr));
12071
12072 return off;
12073}
12074
12075
12076
12077/*********************************************************************************************************************************
12078* Effective Address Calculation *
12079*********************************************************************************************************************************/
12080#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
12081 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
12082
12083/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
12084 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
12085DECL_INLINE_THROW(uint32_t)
12086iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
12087 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
12088{
12089 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
12090
12091 /*
12092 * Handle the disp16 form with no registers first.
12093 *
12094 * Convert to an immediate value, as that'll delay the register allocation
12095 * and assignment till the memory access / call / whatever and we can use
12096 * a more appropriate register (or none at all).
12097 */
12098 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
12099 {
12100 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
12101 return off;
12102 }
12103
12104 /* Determin the displacment. */
12105 uint16_t u16EffAddr;
12106 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
12107 {
12108 case 0: u16EffAddr = 0; break;
12109 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
12110 case 2: u16EffAddr = u16Disp; break;
12111 default: AssertFailedStmt(u16EffAddr = 0);
12112 }
12113
12114 /* Determine the registers involved. */
12115 uint8_t idxGstRegBase;
12116 uint8_t idxGstRegIndex;
12117 switch (bRm & X86_MODRM_RM_MASK)
12118 {
12119 case 0:
12120 idxGstRegBase = X86_GREG_xBX;
12121 idxGstRegIndex = X86_GREG_xSI;
12122 break;
12123 case 1:
12124 idxGstRegBase = X86_GREG_xBX;
12125 idxGstRegIndex = X86_GREG_xDI;
12126 break;
12127 case 2:
12128 idxGstRegBase = X86_GREG_xBP;
12129 idxGstRegIndex = X86_GREG_xSI;
12130 break;
12131 case 3:
12132 idxGstRegBase = X86_GREG_xBP;
12133 idxGstRegIndex = X86_GREG_xDI;
12134 break;
12135 case 4:
12136 idxGstRegBase = X86_GREG_xSI;
12137 idxGstRegIndex = UINT8_MAX;
12138 break;
12139 case 5:
12140 idxGstRegBase = X86_GREG_xDI;
12141 idxGstRegIndex = UINT8_MAX;
12142 break;
12143 case 6:
12144 idxGstRegBase = X86_GREG_xBP;
12145 idxGstRegIndex = UINT8_MAX;
12146 break;
12147#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
12148 default:
12149#endif
12150 case 7:
12151 idxGstRegBase = X86_GREG_xBX;
12152 idxGstRegIndex = UINT8_MAX;
12153 break;
12154 }
12155
12156 /*
12157 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
12158 */
12159 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
12160 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
12161 kIemNativeGstRegUse_ReadOnly);
12162 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
12163 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
12164 kIemNativeGstRegUse_ReadOnly)
12165 : UINT8_MAX;
12166#ifdef RT_ARCH_AMD64
12167 if (idxRegIndex == UINT8_MAX)
12168 {
12169 if (u16EffAddr == 0)
12170 {
12171 /* movxz ret, base */
12172 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
12173 }
12174 else
12175 {
12176 /* lea ret32, [base64 + disp32] */
12177 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
12178 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
12179 if (idxRegRet >= 8 || idxRegBase >= 8)
12180 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
12181 pbCodeBuf[off++] = 0x8d;
12182 if (idxRegBase != X86_GREG_x12 /*SIB*/)
12183 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
12184 else
12185 {
12186 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
12187 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
12188 }
12189 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
12190 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
12191 pbCodeBuf[off++] = 0;
12192 pbCodeBuf[off++] = 0;
12193 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12194
12195 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
12196 }
12197 }
12198 else
12199 {
12200 /* lea ret32, [index64 + base64 (+ disp32)] */
12201 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
12202 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
12203 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
12204 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
12205 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
12206 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
12207 pbCodeBuf[off++] = 0x8d;
12208 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
12209 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
12210 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
12211 if (bMod == X86_MOD_MEM4)
12212 {
12213 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
12214 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
12215 pbCodeBuf[off++] = 0;
12216 pbCodeBuf[off++] = 0;
12217 }
12218 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12219 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
12220 }
12221
12222#elif defined(RT_ARCH_ARM64)
12223 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
12224 if (u16EffAddr == 0)
12225 {
12226 if (idxRegIndex == UINT8_MAX)
12227 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
12228 else
12229 {
12230 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
12231 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
12232 }
12233 }
12234 else
12235 {
12236 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
12237 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
12238 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
12239 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
12240 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
12241 else
12242 {
12243 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
12244 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
12245 }
12246 if (idxRegIndex != UINT8_MAX)
12247 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
12248 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
12249 }
12250
12251#else
12252# error "port me"
12253#endif
12254
12255 if (idxRegIndex != UINT8_MAX)
12256 iemNativeRegFreeTmp(pReNative, idxRegIndex);
12257 iemNativeRegFreeTmp(pReNative, idxRegBase);
12258 iemNativeVarRegisterRelease(pReNative, idxVarRet);
12259 return off;
12260}
12261
12262
12263#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
12264 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
12265
12266/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
12267 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
12268DECL_INLINE_THROW(uint32_t)
12269iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
12270 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
12271{
12272 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
12273
12274 /*
12275 * Handle the disp32 form with no registers first.
12276 *
12277 * Convert to an immediate value, as that'll delay the register allocation
12278 * and assignment till the memory access / call / whatever and we can use
12279 * a more appropriate register (or none at all).
12280 */
12281 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
12282 {
12283 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
12284 return off;
12285 }
12286
12287 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
12288 uint32_t u32EffAddr = 0;
12289 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
12290 {
12291 case 0: break;
12292 case 1: u32EffAddr = (int8_t)u32Disp; break;
12293 case 2: u32EffAddr = u32Disp; break;
12294 default: AssertFailed();
12295 }
12296
12297 /* Get the register (or SIB) value. */
12298 uint8_t idxGstRegBase = UINT8_MAX;
12299 uint8_t idxGstRegIndex = UINT8_MAX;
12300 uint8_t cShiftIndex = 0;
12301 switch (bRm & X86_MODRM_RM_MASK)
12302 {
12303 case 0: idxGstRegBase = X86_GREG_xAX; break;
12304 case 1: idxGstRegBase = X86_GREG_xCX; break;
12305 case 2: idxGstRegBase = X86_GREG_xDX; break;
12306 case 3: idxGstRegBase = X86_GREG_xBX; break;
12307 case 4: /* SIB */
12308 {
12309 /* index /w scaling . */
12310 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
12311 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
12312 {
12313 case 0: idxGstRegIndex = X86_GREG_xAX; break;
12314 case 1: idxGstRegIndex = X86_GREG_xCX; break;
12315 case 2: idxGstRegIndex = X86_GREG_xDX; break;
12316 case 3: idxGstRegIndex = X86_GREG_xBX; break;
12317 case 4: cShiftIndex = 0; /*no index*/ break;
12318 case 5: idxGstRegIndex = X86_GREG_xBP; break;
12319 case 6: idxGstRegIndex = X86_GREG_xSI; break;
12320 case 7: idxGstRegIndex = X86_GREG_xDI; break;
12321 }
12322
12323 /* base */
12324 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
12325 {
12326 case 0: idxGstRegBase = X86_GREG_xAX; break;
12327 case 1: idxGstRegBase = X86_GREG_xCX; break;
12328 case 2: idxGstRegBase = X86_GREG_xDX; break;
12329 case 3: idxGstRegBase = X86_GREG_xBX; break;
12330 case 4:
12331 idxGstRegBase = X86_GREG_xSP;
12332 u32EffAddr += uSibAndRspOffset >> 8;
12333 break;
12334 case 5:
12335 if ((bRm & X86_MODRM_MOD_MASK) != 0)
12336 idxGstRegBase = X86_GREG_xBP;
12337 else
12338 {
12339 Assert(u32EffAddr == 0);
12340 u32EffAddr = u32Disp;
12341 }
12342 break;
12343 case 6: idxGstRegBase = X86_GREG_xSI; break;
12344 case 7: idxGstRegBase = X86_GREG_xDI; break;
12345 }
12346 break;
12347 }
12348 case 5: idxGstRegBase = X86_GREG_xBP; break;
12349 case 6: idxGstRegBase = X86_GREG_xSI; break;
12350 case 7: idxGstRegBase = X86_GREG_xDI; break;
12351 }
12352
12353 /*
12354 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
12355 * the start of the function.
12356 */
12357 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
12358 {
12359 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
12360 return off;
12361 }
12362
12363 /*
12364 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
12365 */
12366 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
12367 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
12368 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
12369 kIemNativeGstRegUse_ReadOnly);
12370 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
12371 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
12372 kIemNativeGstRegUse_ReadOnly);
12373
12374 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
12375 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
12376 {
12377 idxRegBase = idxRegIndex;
12378 idxRegIndex = UINT8_MAX;
12379 }
12380
12381#ifdef RT_ARCH_AMD64
12382 if (idxRegIndex == UINT8_MAX)
12383 {
12384 if (u32EffAddr == 0)
12385 {
12386 /* mov ret, base */
12387 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
12388 }
12389 else
12390 {
12391 /* lea ret32, [base64 + disp32] */
12392 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
12393 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
12394 if (idxRegRet >= 8 || idxRegBase >= 8)
12395 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
12396 pbCodeBuf[off++] = 0x8d;
12397 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
12398 if (idxRegBase != X86_GREG_x12 /*SIB*/)
12399 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
12400 else
12401 {
12402 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
12403 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
12404 }
12405 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
12406 if (bMod == X86_MOD_MEM4)
12407 {
12408 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
12409 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
12410 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
12411 }
12412 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12413 }
12414 }
12415 else
12416 {
12417 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
12418 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
12419 if (idxRegBase == UINT8_MAX)
12420 {
12421 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
12422 if (idxRegRet >= 8 || idxRegIndex >= 8)
12423 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
12424 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
12425 pbCodeBuf[off++] = 0x8d;
12426 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
12427 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
12428 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
12429 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
12430 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
12431 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
12432 }
12433 else
12434 {
12435 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
12436 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
12437 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
12438 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
12439 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
12440 pbCodeBuf[off++] = 0x8d;
12441 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
12442 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
12443 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
12444 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
12445 if (bMod != X86_MOD_MEM0)
12446 {
12447 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
12448 if (bMod == X86_MOD_MEM4)
12449 {
12450 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
12451 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
12452 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
12453 }
12454 }
12455 }
12456 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12457 }
12458
12459#elif defined(RT_ARCH_ARM64)
12460 if (u32EffAddr == 0)
12461 {
12462 if (idxRegIndex == UINT8_MAX)
12463 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
12464 else if (idxRegBase == UINT8_MAX)
12465 {
12466 if (cShiftIndex == 0)
12467 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
12468 else
12469 {
12470 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12471 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
12472 }
12473 }
12474 else
12475 {
12476 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12477 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
12478 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
12479 }
12480 }
12481 else
12482 {
12483 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
12484 {
12485 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12486 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
12487 }
12488 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
12489 {
12490 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12491 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
12492 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
12493 }
12494 else
12495 {
12496 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
12497 if (idxRegBase != UINT8_MAX)
12498 {
12499 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12500 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
12501 }
12502 }
12503 if (idxRegIndex != UINT8_MAX)
12504 {
12505 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12506 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
12507 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
12508 }
12509 }
12510
12511#else
12512# error "port me"
12513#endif
12514
12515 if (idxRegIndex != UINT8_MAX)
12516 iemNativeRegFreeTmp(pReNative, idxRegIndex);
12517 if (idxRegBase != UINT8_MAX)
12518 iemNativeRegFreeTmp(pReNative, idxRegBase);
12519 iemNativeVarRegisterRelease(pReNative, idxVarRet);
12520 return off;
12521}
12522
12523
12524#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
12525 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
12526 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
12527
12528#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
12529 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
12530 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
12531
12532#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
12533 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
12534 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
12535
12536/**
12537 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
12538 *
12539 * @returns New off.
12540 * @param pReNative .
12541 * @param off .
12542 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
12543 * bit 4 to REX.X. The two bits are part of the
12544 * REG sub-field, which isn't needed in this
12545 * function.
12546 * @param uSibAndRspOffset Two parts:
12547 * - The first 8 bits make up the SIB byte.
12548 * - The next 8 bits are the fixed RSP/ESP offset
12549 * in case of a pop [xSP].
12550 * @param u32Disp The displacement byte/word/dword, if any.
12551 * @param cbInstr The size of the fully decoded instruction. Used
12552 * for RIP relative addressing.
12553 * @param idxVarRet The result variable number.
12554 * @param f64Bit Whether to use a 64-bit or 32-bit address size
12555 * when calculating the address.
12556 *
12557 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
12558 */
12559DECL_INLINE_THROW(uint32_t)
12560iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
12561 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
12562{
12563 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
12564
12565 /*
12566 * Special case the rip + disp32 form first.
12567 */
12568 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
12569 {
12570#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
12571 /* Need to take the current PC offset into account for the displacement, no need to flush here
12572 * as the PC is only accessed readonly and there is no branching or calling helpers involved. */
12573 u32Disp += pReNative->Core.offPc;
12574#endif
12575
12576 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
12577 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
12578 kIemNativeGstRegUse_ReadOnly);
12579#ifdef RT_ARCH_AMD64
12580 if (f64Bit)
12581 {
12582 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
12583 if ((int32_t)offFinalDisp == offFinalDisp)
12584 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
12585 else
12586 {
12587 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
12588 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
12589 }
12590 }
12591 else
12592 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
12593
12594#elif defined(RT_ARCH_ARM64)
12595 if (f64Bit)
12596 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
12597 (int64_t)(int32_t)u32Disp + cbInstr);
12598 else
12599 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
12600 (int32_t)u32Disp + cbInstr);
12601
12602#else
12603# error "Port me!"
12604#endif
12605 iemNativeRegFreeTmp(pReNative, idxRegPc);
12606 iemNativeVarRegisterRelease(pReNative, idxVarRet);
12607 return off;
12608 }
12609
12610 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
12611 int64_t i64EffAddr = 0;
12612 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
12613 {
12614 case 0: break;
12615 case 1: i64EffAddr = (int8_t)u32Disp; break;
12616 case 2: i64EffAddr = (int32_t)u32Disp; break;
12617 default: AssertFailed();
12618 }
12619
12620 /* Get the register (or SIB) value. */
12621 uint8_t idxGstRegBase = UINT8_MAX;
12622 uint8_t idxGstRegIndex = UINT8_MAX;
12623 uint8_t cShiftIndex = 0;
12624 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
12625 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
12626 else /* SIB: */
12627 {
12628 /* index /w scaling . */
12629 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
12630 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
12631 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
12632 if (idxGstRegIndex == 4)
12633 {
12634 /* no index */
12635 cShiftIndex = 0;
12636 idxGstRegIndex = UINT8_MAX;
12637 }
12638
12639 /* base */
12640 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
12641 if (idxGstRegBase == 4)
12642 {
12643 /* pop [rsp] hack */
12644 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
12645 }
12646 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
12647 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
12648 {
12649 /* mod=0 and base=5 -> disp32, no base reg. */
12650 Assert(i64EffAddr == 0);
12651 i64EffAddr = (int32_t)u32Disp;
12652 idxGstRegBase = UINT8_MAX;
12653 }
12654 }
12655
12656 /*
12657 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
12658 * the start of the function.
12659 */
12660 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
12661 {
12662 if (f64Bit)
12663 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
12664 else
12665 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
12666 return off;
12667 }
12668
12669 /*
12670 * Now emit code that calculates:
12671 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
12672 * or if !f64Bit:
12673 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
12674 */
12675 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
12676 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
12677 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
12678 kIemNativeGstRegUse_ReadOnly);
12679 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
12680 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
12681 kIemNativeGstRegUse_ReadOnly);
12682
12683 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
12684 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
12685 {
12686 idxRegBase = idxRegIndex;
12687 idxRegIndex = UINT8_MAX;
12688 }
12689
12690#ifdef RT_ARCH_AMD64
12691 uint8_t bFinalAdj;
12692 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
12693 bFinalAdj = 0; /* likely */
12694 else
12695 {
12696 /* pop [rsp] with a problematic disp32 value. Split out the
12697 RSP offset and add it separately afterwards (bFinalAdj). */
12698 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
12699 Assert(idxGstRegBase == X86_GREG_xSP);
12700 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
12701 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
12702 Assert(bFinalAdj != 0);
12703 i64EffAddr -= bFinalAdj;
12704 Assert((int32_t)i64EffAddr == i64EffAddr);
12705 }
12706 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
12707//pReNative->pInstrBuf[off++] = 0xcc;
12708
12709 if (idxRegIndex == UINT8_MAX)
12710 {
12711 if (u32EffAddr == 0)
12712 {
12713 /* mov ret, base */
12714 if (f64Bit)
12715 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
12716 else
12717 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
12718 }
12719 else
12720 {
12721 /* lea ret, [base + disp32] */
12722 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
12723 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
12724 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
12725 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
12726 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
12727 | (f64Bit ? X86_OP_REX_W : 0);
12728 pbCodeBuf[off++] = 0x8d;
12729 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
12730 if (idxRegBase != X86_GREG_x12 /*SIB*/)
12731 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
12732 else
12733 {
12734 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
12735 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
12736 }
12737 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
12738 if (bMod == X86_MOD_MEM4)
12739 {
12740 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
12741 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
12742 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
12743 }
12744 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12745 }
12746 }
12747 else
12748 {
12749 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
12750 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
12751 if (idxRegBase == UINT8_MAX)
12752 {
12753 /* lea ret, [(index64 << cShiftIndex) + disp32] */
12754 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
12755 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
12756 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
12757 | (f64Bit ? X86_OP_REX_W : 0);
12758 pbCodeBuf[off++] = 0x8d;
12759 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
12760 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
12761 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
12762 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
12763 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
12764 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
12765 }
12766 else
12767 {
12768 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
12769 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
12770 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
12771 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
12772 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
12773 | (f64Bit ? X86_OP_REX_W : 0);
12774 pbCodeBuf[off++] = 0x8d;
12775 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
12776 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
12777 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
12778 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
12779 if (bMod != X86_MOD_MEM0)
12780 {
12781 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
12782 if (bMod == X86_MOD_MEM4)
12783 {
12784 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
12785 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
12786 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
12787 }
12788 }
12789 }
12790 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
12791 }
12792
12793 if (!bFinalAdj)
12794 { /* likely */ }
12795 else
12796 {
12797 Assert(f64Bit);
12798 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
12799 }
12800
12801#elif defined(RT_ARCH_ARM64)
12802 if (i64EffAddr == 0)
12803 {
12804 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12805 if (idxRegIndex == UINT8_MAX)
12806 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
12807 else if (idxRegBase != UINT8_MAX)
12808 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
12809 f64Bit, false /*fSetFlags*/, cShiftIndex);
12810 else
12811 {
12812 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
12813 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
12814 }
12815 }
12816 else
12817 {
12818 if (f64Bit)
12819 { /* likely */ }
12820 else
12821 i64EffAddr = (int32_t)i64EffAddr;
12822
12823 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
12824 {
12825 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12826 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
12827 }
12828 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
12829 {
12830 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12831 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
12832 }
12833 else
12834 {
12835 if (f64Bit)
12836 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
12837 else
12838 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
12839 if (idxRegBase != UINT8_MAX)
12840 {
12841 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12842 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
12843 }
12844 }
12845 if (idxRegIndex != UINT8_MAX)
12846 {
12847 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
12848 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
12849 f64Bit, false /*fSetFlags*/, cShiftIndex);
12850 }
12851 }
12852
12853#else
12854# error "port me"
12855#endif
12856
12857 if (idxRegIndex != UINT8_MAX)
12858 iemNativeRegFreeTmp(pReNative, idxRegIndex);
12859 if (idxRegBase != UINT8_MAX)
12860 iemNativeRegFreeTmp(pReNative, idxRegBase);
12861 iemNativeVarRegisterRelease(pReNative, idxVarRet);
12862 return off;
12863}
12864
12865
12866/*********************************************************************************************************************************
12867* TLB Lookup. *
12868*********************************************************************************************************************************/
12869
12870/**
12871 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
12872 */
12873DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint32_t uSegAndSizeAndAccess)
12874{
12875 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccess);
12876 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccess);
12877 uint32_t const fAccess = uSegAndSizeAndAccess >> 16;
12878 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64 LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, cbMem, fAccess, uResult));
12879
12880 /* Do the lookup manually. */
12881 RTGCPTR const GCPtrFlat = iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
12882 uint64_t const uTag = IEMTLB_CALC_TAG( &pVCpu->iem.s.DataTlb, GCPtrFlat);
12883 PIEMTLBENTRY const pTlbe = IEMTLB_TAG_TO_ENTRY(&pVCpu->iem.s.DataTlb, uTag);
12884 if (RT_LIKELY(pTlbe->uTag == uTag))
12885 {
12886 /*
12887 * Check TLB page table level access flags.
12888 */
12889 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
12890 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
12891 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
12892 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
12893 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
12894 | IEMTLBE_F_PG_UNASSIGNED
12895 | IEMTLBE_F_PT_NO_ACCESSED
12896 | fNoWriteNoDirty | fNoUser);
12897 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
12898 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
12899 {
12900 /*
12901 * Return the address.
12902 */
12903 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
12904 if ((uintptr_t)pbAddr == uResult)
12905 return;
12906 RT_NOREF(cbMem);
12907 AssertFailed();
12908 }
12909 else
12910 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
12911 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
12912 }
12913 else
12914 AssertFailed();
12915 RT_BREAKPOINT();
12916}
12917
12918/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
12919
12920
12921/*********************************************************************************************************************************
12922* Memory fetches and stores common *
12923*********************************************************************************************************************************/
12924
12925typedef enum IEMNATIVEMITMEMOP
12926{
12927 kIemNativeEmitMemOp_Store = 0,
12928 kIemNativeEmitMemOp_Fetch,
12929 kIemNativeEmitMemOp_Fetch_Zx_U16,
12930 kIemNativeEmitMemOp_Fetch_Zx_U32,
12931 kIemNativeEmitMemOp_Fetch_Zx_U64,
12932 kIemNativeEmitMemOp_Fetch_Sx_U16,
12933 kIemNativeEmitMemOp_Fetch_Sx_U32,
12934 kIemNativeEmitMemOp_Fetch_Sx_U64
12935} IEMNATIVEMITMEMOP;
12936
12937/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
12938 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
12939 * (with iSegReg = UINT8_MAX). */
12940DECL_INLINE_THROW(uint32_t)
12941iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
12942 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint8_t fAlignMask, IEMNATIVEMITMEMOP enmOp,
12943 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
12944{
12945 /*
12946 * Assert sanity.
12947 */
12948 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
12949 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
12950 Assert( enmOp != kIemNativeEmitMemOp_Store
12951 || pVarValue->enmKind == kIemNativeVarKind_Immediate
12952 || pVarValue->enmKind == kIemNativeVarKind_Stack);
12953 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
12954 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
12955 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
12956 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
12957 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
12958 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
12959 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
12960 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
12961#ifdef VBOX_STRICT
12962 if (iSegReg == UINT8_MAX)
12963 {
12964 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
12965 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
12966 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
12967 switch (cbMem)
12968 {
12969 case 1:
12970 Assert( pfnFunction
12971 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
12972 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
12973 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
12974 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
12975 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
12976 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
12977 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
12978 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
12979 : UINT64_C(0xc000b000a0009000) ));
12980 break;
12981 case 2:
12982 Assert( pfnFunction
12983 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
12984 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
12985 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
12986 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
12987 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
12988 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
12989 : UINT64_C(0xc000b000a0009000) ));
12990 break;
12991 case 4:
12992 Assert( pfnFunction
12993 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
12994 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
12995 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
12996 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
12997 : UINT64_C(0xc000b000a0009000) ));
12998 break;
12999 case 8:
13000 Assert( pfnFunction
13001 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
13002 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
13003 : UINT64_C(0xc000b000a0009000) ));
13004 break;
13005 }
13006 }
13007 else
13008 {
13009 Assert(iSegReg < 6);
13010 switch (cbMem)
13011 {
13012 case 1:
13013 Assert( pfnFunction
13014 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
13015 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
13016 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
13017 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
13018 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
13019 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
13020 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
13021 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
13022 : UINT64_C(0xc000b000a0009000) ));
13023 break;
13024 case 2:
13025 Assert( pfnFunction
13026 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
13027 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
13028 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
13029 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
13030 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
13031 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
13032 : UINT64_C(0xc000b000a0009000) ));
13033 break;
13034 case 4:
13035 Assert( pfnFunction
13036 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
13037 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
13038 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
13039 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
13040 : UINT64_C(0xc000b000a0009000) ));
13041 break;
13042 case 8:
13043 Assert( pfnFunction
13044 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
13045 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
13046 : UINT64_C(0xc000b000a0009000) ));
13047 break;
13048 }
13049 }
13050#endif
13051
13052#ifdef VBOX_STRICT
13053 /*
13054 * Check that the fExec flags we've got make sense.
13055 */
13056 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
13057#endif
13058
13059 /*
13060 * To keep things simple we have to commit any pending writes first as we
13061 * may end up making calls.
13062 */
13063 /** @todo we could postpone this till we make the call and reload the
13064 * registers after returning from the call. Not sure if that's sensible or
13065 * not, though. */
13066#ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
13067 off = iemNativeRegFlushPendingWrites(pReNative, off);
13068#else
13069 /* The program counter is treated differently for now. */
13070 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
13071#endif
13072
13073#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13074 /*
13075 * Move/spill/flush stuff out of call-volatile registers.
13076 * This is the easy way out. We could contain this to the tlb-miss branch
13077 * by saving and restoring active stuff here.
13078 */
13079 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
13080#endif
13081
13082 /*
13083 * Define labels and allocate the result register (trying for the return
13084 * register if we can).
13085 */
13086 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
13087 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
13088 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
13089 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
13090 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
13091 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
13092 uint8_t const idxRegValueStore = !TlbState.fSkip
13093 && enmOp == kIemNativeEmitMemOp_Store
13094 && pVarValue->enmKind != kIemNativeVarKind_Immediate
13095 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off)
13096 : UINT8_MAX;
13097 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
13098 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
13099 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
13100 : UINT32_MAX;
13101
13102 /*
13103 * Jump to the TLB lookup code.
13104 */
13105 if (!TlbState.fSkip)
13106 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
13107
13108 /*
13109 * TlbMiss:
13110 *
13111 * Call helper to do the fetching.
13112 * We flush all guest register shadow copies here.
13113 */
13114 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
13115
13116#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
13117 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
13118#else
13119 RT_NOREF(idxInstr);
13120#endif
13121
13122#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
13123 if (pReNative->Core.offPc)
13124 {
13125 /*
13126 * Update the program counter but restore it at the end of the TlbMiss branch.
13127 * This should allow delaying more program counter updates for the TlbLookup and hit paths
13128 * which are hopefully much more frequent, reducing the amount of memory accesses.
13129 */
13130 /* Allocate a temporary PC register. */
13131 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
13132
13133 /* Perform the addition and store the result. */
13134 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
13135 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
13136
13137 /* Free and flush the PC register. */
13138 iemNativeRegFreeTmp(pReNative, idxPcReg);
13139 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
13140 }
13141#endif
13142
13143#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13144 /* Save variables in volatile registers. */
13145 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
13146 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
13147 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
13148 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
13149#endif
13150
13151 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
13152 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
13153 if (enmOp == kIemNativeEmitMemOp_Store)
13154 {
13155 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
13156 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
13157#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13158 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
13159#else
13160 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
13161 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
13162#endif
13163 }
13164
13165 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
13166 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
13167#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13168 fVolGregMask);
13169#else
13170 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
13171#endif
13172
13173 if (iSegReg != UINT8_MAX)
13174 {
13175 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
13176 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
13177 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
13178 }
13179
13180 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
13181 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
13182
13183 /* Done setting up parameters, make the call. */
13184 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
13185
13186 /*
13187 * Put the result in the right register if this is a fetch.
13188 */
13189 if (enmOp != kIemNativeEmitMemOp_Store)
13190 {
13191 Assert(idxRegValueFetch == pVarValue->idxReg);
13192 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
13193 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
13194 }
13195
13196#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13197 /* Restore variables and guest shadow registers to volatile registers. */
13198 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
13199 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
13200#endif
13201
13202#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
13203 if (pReNative->Core.offPc)
13204 {
13205 /*
13206 * Time to restore the program counter to its original value.
13207 */
13208 /* Allocate a temporary PC register. */
13209 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
13210
13211 /* Restore the original value. */
13212 off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
13213 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
13214
13215 /* Free and flush the PC register. */
13216 iemNativeRegFreeTmp(pReNative, idxPcReg);
13217 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
13218 }
13219#endif
13220
13221#ifdef IEMNATIVE_WITH_TLB_LOOKUP
13222 if (!TlbState.fSkip)
13223 {
13224 /* end of TlbMiss - Jump to the done label. */
13225 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
13226 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
13227
13228 /*
13229 * TlbLookup:
13230 */
13231 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask,
13232 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
13233 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
13234
13235 /*
13236 * Emit code to do the actual storing / fetching.
13237 */
13238 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
13239# ifdef VBOX_WITH_STATISTICS
13240 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
13241 enmOp == kIemNativeEmitMemOp_Store
13242 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
13243 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
13244# endif
13245 switch (enmOp)
13246 {
13247 case kIemNativeEmitMemOp_Store:
13248 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
13249 {
13250 switch (cbMem)
13251 {
13252 case 1:
13253 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
13254 break;
13255 case 2:
13256 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
13257 break;
13258 case 4:
13259 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
13260 break;
13261 case 8:
13262 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
13263 break;
13264 default:
13265 AssertFailed();
13266 }
13267 }
13268 else
13269 {
13270 switch (cbMem)
13271 {
13272 case 1:
13273 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
13274 idxRegMemResult, TlbState.idxReg1);
13275 break;
13276 case 2:
13277 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
13278 idxRegMemResult, TlbState.idxReg1);
13279 break;
13280 case 4:
13281 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
13282 idxRegMemResult, TlbState.idxReg1);
13283 break;
13284 case 8:
13285 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
13286 idxRegMemResult, TlbState.idxReg1);
13287 break;
13288 default:
13289 AssertFailed();
13290 }
13291 }
13292 break;
13293
13294 case kIemNativeEmitMemOp_Fetch:
13295 case kIemNativeEmitMemOp_Fetch_Zx_U16:
13296 case kIemNativeEmitMemOp_Fetch_Zx_U32:
13297 case kIemNativeEmitMemOp_Fetch_Zx_U64:
13298 switch (cbMem)
13299 {
13300 case 1:
13301 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13302 break;
13303 case 2:
13304 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13305 break;
13306 case 4:
13307 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13308 break;
13309 case 8:
13310 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13311 break;
13312 default:
13313 AssertFailed();
13314 }
13315 break;
13316
13317 case kIemNativeEmitMemOp_Fetch_Sx_U16:
13318 Assert(cbMem == 1);
13319 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13320 break;
13321
13322 case kIemNativeEmitMemOp_Fetch_Sx_U32:
13323 Assert(cbMem == 1 || cbMem == 2);
13324 if (cbMem == 1)
13325 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13326 else
13327 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13328 break;
13329
13330 case kIemNativeEmitMemOp_Fetch_Sx_U64:
13331 switch (cbMem)
13332 {
13333 case 1:
13334 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13335 break;
13336 case 2:
13337 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13338 break;
13339 case 4:
13340 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
13341 break;
13342 default:
13343 AssertFailed();
13344 }
13345 break;
13346
13347 default:
13348 AssertFailed();
13349 }
13350
13351 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
13352
13353 /*
13354 * TlbDone:
13355 */
13356 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
13357
13358 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
13359
13360# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
13361 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
13362 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
13363# endif
13364 }
13365#else
13366 RT_NOREF(fAlignMask, idxLabelTlbMiss);
13367#endif
13368
13369 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
13370 iemNativeVarRegisterRelease(pReNative, idxVarValue);
13371 return off;
13372}
13373
13374
13375
13376/*********************************************************************************************************************************
13377* Memory fetches (IEM_MEM_FETCH_XXX). *
13378*********************************************************************************************************************************/
13379
13380/* 8-bit segmented: */
13381#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
13382 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
13383 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
13384 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
13385
13386#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
13387 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
13388 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
13389 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
13390
13391#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
13392 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
13393 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
13394 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
13395
13396#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
13397 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
13398 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
13399 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
13400
13401#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
13402 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
13403 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
13404 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
13405
13406#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
13407 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
13408 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
13409 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
13410
13411#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
13412 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
13413 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
13414 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
13415
13416/* 16-bit segmented: */
13417#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
13418 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
13419 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
13420 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
13421
13422#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
13423 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
13424 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
13425 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
13426
13427#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
13428 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
13429 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
13430 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
13431
13432#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
13433 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
13434 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
13435 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
13436
13437#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
13438 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
13439 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
13440 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
13441
13442#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
13443 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
13444 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
13445 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
13446
13447
13448/* 32-bit segmented: */
13449#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
13450 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
13451 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
13452 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
13453
13454#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
13455 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
13456 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
13457 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
13458
13459#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
13460 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
13461 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
13462 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
13463
13464#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
13465 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
13466 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
13467 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
13468
13469
13470/* 64-bit segmented: */
13471#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
13472 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
13473 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
13474 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
13475
13476
13477
13478/* 8-bit flat: */
13479#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
13480 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
13481 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
13482 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
13483
13484#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
13485 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
13486 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
13487 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
13488
13489#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
13490 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
13491 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
13492 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
13493
13494#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
13495 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
13496 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
13497 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
13498
13499#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
13500 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
13501 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
13502 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
13503
13504#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
13505 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
13506 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
13507 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
13508
13509#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
13510 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
13511 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
13512 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
13513
13514
13515/* 16-bit flat: */
13516#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
13517 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
13518 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
13519 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
13520
13521#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
13522 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
13523 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
13524 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
13525
13526#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
13527 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
13528 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
13529 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
13530
13531#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
13532 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
13533 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
13534 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
13535
13536#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
13537 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
13538 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
13539 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
13540
13541#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
13542 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
13543 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
13544 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
13545
13546/* 32-bit flat: */
13547#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
13548 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
13549 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
13550 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
13551
13552#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
13553 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
13554 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
13555 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
13556
13557#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
13558 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
13559 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
13560 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
13561
13562#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
13563 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
13564 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
13565 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
13566
13567/* 64-bit flat: */
13568#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
13569 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
13570 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
13571 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
13572
13573
13574
13575/*********************************************************************************************************************************
13576* Memory stores (IEM_MEM_STORE_XXX). *
13577*********************************************************************************************************************************/
13578
13579#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
13580 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
13581 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
13582 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
13583
13584#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
13585 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
13586 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
13587 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
13588
13589#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
13590 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
13591 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
13592 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
13593
13594#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
13595 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
13596 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
13597 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
13598
13599
13600#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
13601 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
13602 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
13603 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
13604
13605#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
13606 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
13607 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
13608 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
13609
13610#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
13611 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
13612 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
13613 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
13614
13615#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
13616 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
13617 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
13618 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
13619
13620
13621#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
13622 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
13623 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
13624
13625#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
13626 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
13627 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
13628
13629#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
13630 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
13631 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
13632
13633#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
13634 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
13635 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
13636
13637
13638#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
13639 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
13640 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
13641
13642#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
13643 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
13644 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
13645
13646#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
13647 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
13648 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
13649
13650#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
13651 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
13652 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
13653
13654/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
13655 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
13656DECL_INLINE_THROW(uint32_t)
13657iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
13658 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
13659{
13660 /*
13661 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
13662 * to do the grunt work.
13663 */
13664 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
13665 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
13666 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
13667 pfnFunction, idxInstr);
13668 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
13669 return off;
13670}
13671
13672
13673
13674/*********************************************************************************************************************************
13675* Stack Accesses. *
13676*********************************************************************************************************************************/
13677/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
13678#define IEM_MC_PUSH_U16(a_u16Value) \
13679 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
13680 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
13681#define IEM_MC_PUSH_U32(a_u32Value) \
13682 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
13683 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
13684#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
13685 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
13686 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
13687#define IEM_MC_PUSH_U64(a_u64Value) \
13688 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
13689 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
13690
13691#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
13692 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
13693 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
13694#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
13695 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
13696 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
13697#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
13698 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
13699 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
13700
13701#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
13702 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
13703 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
13704#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
13705 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
13706 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
13707
13708
13709DECL_FORCE_INLINE_THROW(uint32_t)
13710iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
13711{
13712 /* Use16BitSp: */
13713#ifdef RT_ARCH_AMD64
13714 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
13715 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
13716#else
13717 /* sub regeff, regrsp, #cbMem */
13718 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
13719 /* and regeff, regeff, #0xffff */
13720 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
13721 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
13722 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
13723 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
13724#endif
13725 return off;
13726}
13727
13728
13729DECL_FORCE_INLINE(uint32_t)
13730iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
13731{
13732 /* Use32BitSp: */
13733 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
13734 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
13735 return off;
13736}
13737
13738
13739/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
13740DECL_INLINE_THROW(uint32_t)
13741iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
13742 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
13743{
13744 /*
13745 * Assert sanity.
13746 */
13747 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
13748 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
13749#ifdef VBOX_STRICT
13750 if (RT_BYTE2(cBitsVarAndFlat) != 0)
13751 {
13752 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
13753 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
13754 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
13755 Assert( pfnFunction
13756 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
13757 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
13758 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
13759 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
13760 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
13761 : UINT64_C(0xc000b000a0009000) ));
13762 }
13763 else
13764 Assert( pfnFunction
13765 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
13766 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
13767 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
13768 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
13769 : UINT64_C(0xc000b000a0009000) ));
13770#endif
13771
13772#ifdef VBOX_STRICT
13773 /*
13774 * Check that the fExec flags we've got make sense.
13775 */
13776 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
13777#endif
13778
13779 /*
13780 * To keep things simple we have to commit any pending writes first as we
13781 * may end up making calls.
13782 */
13783 /** @todo we could postpone this till we make the call and reload the
13784 * registers after returning from the call. Not sure if that's sensible or
13785 * not, though. */
13786 off = iemNativeRegFlushPendingWrites(pReNative, off);
13787
13788 /*
13789 * First we calculate the new RSP and the effective stack pointer value.
13790 * For 64-bit mode and flat 32-bit these two are the same.
13791 * (Code structure is very similar to that of PUSH)
13792 */
13793 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
13794 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
13795 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
13796 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
13797 ? cbMem : sizeof(uint16_t);
13798 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
13799 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
13800 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
13801 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
13802 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
13803 if (cBitsFlat != 0)
13804 {
13805 Assert(idxRegEffSp == idxRegRsp);
13806 Assert(cBitsFlat == 32 || cBitsFlat == 64);
13807 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
13808 if (cBitsFlat == 64)
13809 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
13810 else
13811 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
13812 }
13813 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
13814 {
13815 Assert(idxRegEffSp != idxRegRsp);
13816 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
13817 kIemNativeGstRegUse_ReadOnly);
13818#ifdef RT_ARCH_AMD64
13819 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
13820#else
13821 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
13822#endif
13823 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
13824 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
13825 offFixupJumpToUseOtherBitSp = off;
13826 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
13827 {
13828 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
13829 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
13830 }
13831 else
13832 {
13833 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
13834 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
13835 }
13836 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
13837 }
13838 /* SpUpdateEnd: */
13839 uint32_t const offLabelSpUpdateEnd = off;
13840
13841 /*
13842 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
13843 * we're skipping lookup).
13844 */
13845 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
13846 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
13847 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
13848 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
13849 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
13850 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
13851 : UINT32_MAX;
13852 uint8_t const idxRegValue = !TlbState.fSkip
13853 && pVarValue->enmKind != kIemNativeVarKind_Immediate
13854 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
13855 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
13856 : UINT8_MAX;
13857 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
13858
13859
13860 if (!TlbState.fSkip)
13861 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
13862 else
13863 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
13864
13865 /*
13866 * Use16BitSp:
13867 */
13868 if (cBitsFlat == 0)
13869 {
13870#ifdef RT_ARCH_AMD64
13871 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
13872#else
13873 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
13874#endif
13875 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
13876 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
13877 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
13878 else
13879 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
13880 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
13881 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
13882 }
13883
13884 /*
13885 * TlbMiss:
13886 *
13887 * Call helper to do the pushing.
13888 */
13889 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
13890
13891#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
13892 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
13893#else
13894 RT_NOREF(idxInstr);
13895#endif
13896
13897 /* Save variables in volatile registers. */
13898 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
13899 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
13900 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
13901 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
13902 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
13903
13904 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
13905 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
13906 {
13907 /* Swap them using ARG0 as temp register: */
13908 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
13909 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
13910 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
13911 }
13912 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
13913 {
13914 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
13915 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
13916 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
13917
13918 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
13919 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
13920 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
13921 }
13922 else
13923 {
13924 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
13925 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
13926
13927 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
13928 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
13929 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~IEMNATIVE_CALL_ARG1_GREG);
13930 }
13931
13932 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
13933 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
13934
13935 /* Done setting up parameters, make the call. */
13936 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
13937
13938 /* Restore variables and guest shadow registers to volatile registers. */
13939 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
13940 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
13941
13942#ifdef IEMNATIVE_WITH_TLB_LOOKUP
13943 if (!TlbState.fSkip)
13944 {
13945 /* end of TlbMiss - Jump to the done label. */
13946 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
13947 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
13948
13949 /*
13950 * TlbLookup:
13951 */
13952 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
13953 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
13954
13955 /*
13956 * Emit code to do the actual storing / fetching.
13957 */
13958 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
13959# ifdef VBOX_WITH_STATISTICS
13960 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
13961 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
13962# endif
13963 if (idxRegValue != UINT8_MAX)
13964 {
13965 switch (cbMemAccess)
13966 {
13967 case 2:
13968 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
13969 break;
13970 case 4:
13971 if (!fIsIntelSeg)
13972 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
13973 else
13974 {
13975 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
13976 PUSH FS in real mode, so we have to try emulate that here.
13977 We borrow the now unused idxReg1 from the TLB lookup code here. */
13978 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
13979 kIemNativeGstReg_EFlags);
13980 if (idxRegEfl != UINT8_MAX)
13981 {
13982#ifdef ARCH_AMD64
13983 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
13984 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
13985 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
13986#else
13987 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
13988 off, TlbState.idxReg1, idxRegEfl,
13989 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
13990#endif
13991 iemNativeRegFreeTmp(pReNative, idxRegEfl);
13992 }
13993 else
13994 {
13995 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
13996 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
13997 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
13998 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
13999 }
14000 /* ASSUMES the upper half of idxRegValue is ZERO. */
14001 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
14002 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
14003 }
14004 break;
14005 case 8:
14006 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
14007 break;
14008 default:
14009 AssertFailed();
14010 }
14011 }
14012 else
14013 {
14014 switch (cbMemAccess)
14015 {
14016 case 2:
14017 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
14018 idxRegMemResult, TlbState.idxReg1);
14019 break;
14020 case 4:
14021 Assert(!fIsSegReg);
14022 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
14023 idxRegMemResult, TlbState.idxReg1);
14024 break;
14025 case 8:
14026 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
14027 break;
14028 default:
14029 AssertFailed();
14030 }
14031 }
14032
14033 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
14034 TlbState.freeRegsAndReleaseVars(pReNative);
14035
14036 /*
14037 * TlbDone:
14038 *
14039 * Commit the new RSP value.
14040 */
14041 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
14042 }
14043#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
14044
14045 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
14046 iemNativeRegFreeTmp(pReNative, idxRegRsp);
14047 if (idxRegEffSp != idxRegRsp)
14048 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
14049
14050 /* The value variable is implictly flushed. */
14051 if (idxRegValue != UINT8_MAX)
14052 iemNativeVarRegisterRelease(pReNative, idxVarValue);
14053 iemNativeVarFreeLocal(pReNative, idxVarValue);
14054
14055 return off;
14056}
14057
14058
14059
14060/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
14061#define IEM_MC_POP_GREG_U16(a_iGReg) \
14062 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
14063 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
14064#define IEM_MC_POP_GREG_U32(a_iGReg) \
14065 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
14066 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
14067#define IEM_MC_POP_GREG_U64(a_iGReg) \
14068 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
14069 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
14070
14071#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
14072 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
14073 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
14074#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
14075 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
14076 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
14077
14078#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
14079 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
14080 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
14081#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
14082 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
14083 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
14084
14085
14086DECL_FORCE_INLINE_THROW(uint32_t)
14087iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
14088 uint8_t idxRegTmp)
14089{
14090 /* Use16BitSp: */
14091#ifdef RT_ARCH_AMD64
14092 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
14093 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
14094 RT_NOREF(idxRegTmp);
14095#else
14096 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
14097 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
14098 /* add tmp, regrsp, #cbMem */
14099 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
14100 /* and tmp, tmp, #0xffff */
14101 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
14102 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
14103 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
14104 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
14105#endif
14106 return off;
14107}
14108
14109
14110DECL_FORCE_INLINE(uint32_t)
14111iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
14112{
14113 /* Use32BitSp: */
14114 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
14115 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
14116 return off;
14117}
14118
14119
14120/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
14121DECL_INLINE_THROW(uint32_t)
14122iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
14123 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
14124{
14125 /*
14126 * Assert sanity.
14127 */
14128 Assert(idxGReg < 16);
14129#ifdef VBOX_STRICT
14130 if (RT_BYTE2(cBitsVarAndFlat) != 0)
14131 {
14132 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
14133 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
14134 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
14135 Assert( pfnFunction
14136 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
14137 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
14138 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
14139 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
14140 : UINT64_C(0xc000b000a0009000) ));
14141 }
14142 else
14143 Assert( pfnFunction
14144 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
14145 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
14146 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
14147 : UINT64_C(0xc000b000a0009000) ));
14148#endif
14149
14150#ifdef VBOX_STRICT
14151 /*
14152 * Check that the fExec flags we've got make sense.
14153 */
14154 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
14155#endif
14156
14157 /*
14158 * To keep things simple we have to commit any pending writes first as we
14159 * may end up making calls.
14160 */
14161 off = iemNativeRegFlushPendingWrites(pReNative, off);
14162
14163 /*
14164 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
14165 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
14166 * directly as the effective stack pointer.
14167 * (Code structure is very similar to that of PUSH)
14168 */
14169 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
14170 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
14171 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
14172 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
14173 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
14174 /** @todo can do a better job picking the register here. For cbMem >= 4 this
14175 * will be the resulting register value. */
14176 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
14177
14178 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
14179 if (cBitsFlat != 0)
14180 {
14181 Assert(idxRegEffSp == idxRegRsp);
14182 Assert(cBitsFlat == 32 || cBitsFlat == 64);
14183 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
14184 }
14185 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
14186 {
14187 Assert(idxRegEffSp != idxRegRsp);
14188 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
14189 kIemNativeGstRegUse_ReadOnly);
14190#ifdef RT_ARCH_AMD64
14191 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
14192#else
14193 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
14194#endif
14195 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
14196 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
14197 offFixupJumpToUseOtherBitSp = off;
14198 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
14199 {
14200/** @todo can skip idxRegRsp updating when popping ESP. */
14201 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
14202 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
14203 }
14204 else
14205 {
14206 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
14207 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
14208 }
14209 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
14210 }
14211 /* SpUpdateEnd: */
14212 uint32_t const offLabelSpUpdateEnd = off;
14213
14214 /*
14215 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
14216 * we're skipping lookup).
14217 */
14218 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
14219 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
14220 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
14221 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
14222 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
14223 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
14224 : UINT32_MAX;
14225
14226 if (!TlbState.fSkip)
14227 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
14228 else
14229 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
14230
14231 /*
14232 * Use16BitSp:
14233 */
14234 if (cBitsFlat == 0)
14235 {
14236#ifdef RT_ARCH_AMD64
14237 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
14238#else
14239 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
14240#endif
14241 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
14242 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
14243 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
14244 else
14245 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
14246 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
14247 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
14248 }
14249
14250 /*
14251 * TlbMiss:
14252 *
14253 * Call helper to do the pushing.
14254 */
14255 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
14256
14257#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
14258 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
14259#else
14260 RT_NOREF(idxInstr);
14261#endif
14262
14263 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
14264 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
14265 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
14266 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
14267
14268
14269 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
14270 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
14271 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
14272
14273 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
14274 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
14275
14276 /* Done setting up parameters, make the call. */
14277 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
14278
14279 /* Move the return register content to idxRegMemResult. */
14280 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
14281 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
14282
14283 /* Restore variables and guest shadow registers to volatile registers. */
14284 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
14285 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
14286
14287#ifdef IEMNATIVE_WITH_TLB_LOOKUP
14288 if (!TlbState.fSkip)
14289 {
14290 /* end of TlbMiss - Jump to the done label. */
14291 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
14292 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
14293
14294 /*
14295 * TlbLookup:
14296 */
14297 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
14298 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
14299
14300 /*
14301 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
14302 */
14303 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
14304# ifdef VBOX_WITH_STATISTICS
14305 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
14306 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
14307# endif
14308 switch (cbMem)
14309 {
14310 case 2:
14311 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
14312 break;
14313 case 4:
14314 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
14315 break;
14316 case 8:
14317 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
14318 break;
14319 default:
14320 AssertFailed();
14321 }
14322
14323 TlbState.freeRegsAndReleaseVars(pReNative);
14324
14325 /*
14326 * TlbDone:
14327 *
14328 * Set the new RSP value (FLAT accesses needs to calculate it first) and
14329 * commit the popped register value.
14330 */
14331 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
14332 }
14333#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
14334
14335 if (idxGReg != X86_GREG_xSP)
14336 {
14337 /* Set the register. */
14338 if (cbMem >= sizeof(uint32_t))
14339 {
14340#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
14341 AssertMsg( pReNative->idxCurCall == 0
14342 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
14343 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
14344#endif
14345 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
14346 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
14347 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
14348 }
14349 else
14350 {
14351 Assert(cbMem == sizeof(uint16_t));
14352 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
14353 kIemNativeGstRegUse_ForUpdate);
14354 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
14355 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
14356 iemNativeRegFreeTmp(pReNative, idxRegDst);
14357 }
14358
14359 /* Complete RSP calculation for FLAT mode. */
14360 if (idxRegEffSp == idxRegRsp)
14361 {
14362 if (cBitsFlat == 64)
14363 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
14364 else
14365 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
14366 }
14367 }
14368 else
14369 {
14370 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
14371 if (cbMem == sizeof(uint64_t))
14372 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
14373 else if (cbMem == sizeof(uint32_t))
14374 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
14375 else
14376 {
14377 if (idxRegEffSp == idxRegRsp)
14378 {
14379 if (cBitsFlat == 64)
14380 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
14381 else
14382 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
14383 }
14384 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
14385 }
14386 }
14387 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
14388
14389 iemNativeRegFreeTmp(pReNative, idxRegRsp);
14390 if (idxRegEffSp != idxRegRsp)
14391 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
14392 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
14393
14394 return off;
14395}
14396
14397
14398
14399/*********************************************************************************************************************************
14400* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
14401*********************************************************************************************************************************/
14402
14403#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14404 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
14405 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
14406 (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
14407
14408#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14409 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
14410 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
14411 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
14412
14413#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14414 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
14415 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
14416 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
14417
14418#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14419 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
14420 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
14421 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
14422
14423
14424#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14425 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
14426 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14427 (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
14428
14429#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14430 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
14431 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14432 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
14433
14434#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14435 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
14436 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14437 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
14438
14439#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14440 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
14441 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14442 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
14443
14444#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14445 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
14446 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14447 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
14448
14449
14450#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14451 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
14452 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14453 (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
14454
14455#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14456 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
14457 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14458 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
14459
14460#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14461 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
14462 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14463 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
14464
14465#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14466 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
14467 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14468 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
14469
14470#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14471 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
14472 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14473 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
14474
14475
14476#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14477 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
14478 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14479 (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
14480
14481#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14482 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
14483 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14484 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
14485#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14486 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
14487 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14488 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
14489
14490#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14491 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
14492 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14493 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
14494
14495#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14496 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
14497 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14498 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
14499
14500
14501#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14502 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
14503 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14504 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
14505
14506#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14507 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
14508 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
14509 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
14510
14511
14512#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14513 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
14514 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14515 (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
14516
14517#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14518 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
14519 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14520 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
14521
14522#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14523 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
14524 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14525 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
14526
14527#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
14528 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
14529 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14530 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
14531
14532
14533
14534#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
14535 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
14536 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
14537 (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
14538
14539#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
14540 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
14541 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
14542 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
14543
14544#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
14545 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
14546 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
14547 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
14548
14549#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
14550 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
14551 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
14552 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
14553
14554
14555#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
14556 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
14557 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14558 (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
14559
14560#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
14561 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
14562 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14563 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
14564
14565#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
14566 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
14567 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14568 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
14569
14570#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
14571 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
14572 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14573 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
14574
14575#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
14576 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
14577 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
14578 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
14579
14580
14581#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
14582 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
14583 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14584 (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
14585
14586#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
14587 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
14588 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14589 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
14590
14591#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
14592 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
14593 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14594 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
14595
14596#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
14597 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
14598 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14599 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
14600
14601#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
14602 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
14603 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
14604 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
14605
14606
14607#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
14608 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
14609 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14610 (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
14611
14612#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
14613 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
14614 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14615 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
14616
14617#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
14618 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
14619 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14620 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
14621
14622#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
14623 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
14624 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14625 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
14626
14627#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
14628 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
14629 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14630 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
14631
14632
14633#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
14634 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
14635 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
14636 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
14637
14638#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
14639 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
14640 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
14641 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
14642
14643
14644#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
14645 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
14646 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14647 (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
14648
14649#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
14650 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
14651 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14652 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
14653
14654#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
14655 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
14656 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14657 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
14658
14659#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
14660 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
14661 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
14662 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
14663
14664
14665DECL_INLINE_THROW(uint32_t)
14666iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
14667 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint8_t fAlignMask,
14668 uintptr_t pfnFunction, uint8_t idxInstr)
14669{
14670 /*
14671 * Assert sanity.
14672 */
14673 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
14674 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
14675 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
14676 && pVarMem->cbVar == sizeof(void *),
14677 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
14678
14679 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
14680 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
14681 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
14682 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
14683 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
14684
14685 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
14686 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
14687 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
14688 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
14689 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
14690
14691 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
14692
14693 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
14694
14695#ifdef VBOX_STRICT
14696# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
14697 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
14698 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
14699 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
14700 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
14701# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
14702 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
14703 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
14704 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
14705
14706 if (iSegReg == UINT8_MAX)
14707 {
14708 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
14709 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
14710 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
14711 switch (cbMem)
14712 {
14713 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8)); break;
14714 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16)); break;
14715 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32)); break;
14716 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64)); break;
14717 case 10:
14718 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
14719 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
14720 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
14721 break;
14722 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128)); break;
14723# if 0
14724 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256)); break;
14725 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512)); break;
14726# endif
14727 default: AssertFailed(); break;
14728 }
14729 }
14730 else
14731 {
14732 Assert(iSegReg < 6);
14733 switch (cbMem)
14734 {
14735 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8)); break;
14736 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16)); break;
14737 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32)); break;
14738 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64)); break;
14739 case 10:
14740 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
14741 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
14742 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
14743 break;
14744 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128)); break;
14745# if 0
14746 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256)); break;
14747 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512)); break;
14748# endif
14749 default: AssertFailed(); break;
14750 }
14751 }
14752# undef IEM_MAP_HLP_FN
14753# undef IEM_MAP_HLP_FN_NO_AT
14754#endif
14755
14756#ifdef VBOX_STRICT
14757 /*
14758 * Check that the fExec flags we've got make sense.
14759 */
14760 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
14761#endif
14762
14763 /*
14764 * To keep things simple we have to commit any pending writes first as we
14765 * may end up making calls.
14766 */
14767 off = iemNativeRegFlushPendingWrites(pReNative, off);
14768
14769#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
14770 /*
14771 * Move/spill/flush stuff out of call-volatile registers.
14772 * This is the easy way out. We could contain this to the tlb-miss branch
14773 * by saving and restoring active stuff here.
14774 */
14775 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
14776 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
14777#endif
14778
14779 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
14780 while the tlb-miss codepath will temporarily put it on the stack.
14781 Set the the type to stack here so we don't need to do it twice below. */
14782 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
14783 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
14784 /** @todo use a tmp register from TlbState, since they'll be free after tlb
14785 * lookup is done. */
14786
14787 /*
14788 * Define labels and allocate the result register (trying for the return
14789 * register if we can).
14790 */
14791 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
14792 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
14793 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
14794 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
14795 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
14796 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
14797 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
14798 : UINT32_MAX;
14799//off=iemNativeEmitBrk(pReNative, off, 0);
14800 /*
14801 * Jump to the TLB lookup code.
14802 */
14803 if (!TlbState.fSkip)
14804 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
14805
14806 /*
14807 * TlbMiss:
14808 *
14809 * Call helper to do the fetching.
14810 * We flush all guest register shadow copies here.
14811 */
14812 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
14813
14814#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
14815 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
14816#else
14817 RT_NOREF(idxInstr);
14818#endif
14819
14820#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
14821 /* Save variables in volatile registers. */
14822 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
14823 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
14824#endif
14825
14826 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
14827 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
14828#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
14829 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
14830#else
14831 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
14832#endif
14833
14834 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
14835 if (iSegReg != UINT8_MAX)
14836 {
14837 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
14838 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
14839 }
14840
14841 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
14842 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
14843 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
14844
14845 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
14846 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
14847
14848 /* Done setting up parameters, make the call. */
14849 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
14850
14851 /*
14852 * Put the output in the right registers.
14853 */
14854 Assert(idxRegMemResult == pVarMem->idxReg);
14855 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
14856 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
14857
14858#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
14859 /* Restore variables and guest shadow registers to volatile registers. */
14860 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
14861 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
14862#endif
14863
14864 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
14865 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
14866
14867#ifdef IEMNATIVE_WITH_TLB_LOOKUP
14868 if (!TlbState.fSkip)
14869 {
14870 /* end of tlbsmiss - Jump to the done label. */
14871 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
14872 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
14873
14874 /*
14875 * TlbLookup:
14876 */
14877 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask, fAccess,
14878 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
14879# ifdef VBOX_WITH_STATISTICS
14880 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
14881 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
14882# endif
14883
14884 /* [idxVarUnmapInfo] = 0; */
14885 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
14886
14887 /*
14888 * TlbDone:
14889 */
14890 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
14891
14892 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
14893
14894# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
14895 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
14896 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
14897# endif
14898 }
14899#else
14900 RT_NOREF(fAccess, fAlignMask, idxLabelTlbMiss);
14901#endif
14902
14903 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
14904 iemNativeVarRegisterRelease(pReNative, idxVarMem);
14905
14906 return off;
14907}
14908
14909
14910#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
14911 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
14912 (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
14913
14914#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
14915 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
14916 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
14917
14918#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
14919 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
14920 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
14921
14922#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
14923 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
14924 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
14925
14926DECL_INLINE_THROW(uint32_t)
14927iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
14928 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
14929{
14930 /*
14931 * Assert sanity.
14932 */
14933 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
14934#if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
14935 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
14936#endif
14937 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
14938 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
14939 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
14940#ifdef VBOX_STRICT
14941 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
14942 {
14943 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
14944 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
14945 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
14946 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
14947 case IEM_ACCESS_TYPE_WRITE:
14948 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
14949 case IEM_ACCESS_TYPE_READ:
14950 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
14951 default: AssertFailed();
14952 }
14953#else
14954 RT_NOREF(fAccess);
14955#endif
14956
14957 /*
14958 * To keep things simple we have to commit any pending writes first as we
14959 * may end up making calls (there shouldn't be any at this point, so this
14960 * is just for consistency).
14961 */
14962 /** @todo we could postpone this till we make the call and reload the
14963 * registers after returning from the call. Not sure if that's sensible or
14964 * not, though. */
14965 off = iemNativeRegFlushPendingWrites(pReNative, off);
14966
14967 /*
14968 * Move/spill/flush stuff out of call-volatile registers.
14969 *
14970 * We exclude any register holding the bUnmapInfo variable, as we'll be
14971 * checking it after returning from the call and will free it afterwards.
14972 */
14973 /** @todo save+restore active registers and maybe guest shadows in miss
14974 * scenario. */
14975 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */, RT_BIT_32(idxVarUnmapInfo));
14976
14977 /*
14978 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
14979 * to call the unmap helper function.
14980 *
14981 * The likelyhood of it being zero is higher than for the TLB hit when doing
14982 * the mapping, as a TLB miss for an well aligned and unproblematic memory
14983 * access should also end up with a mapping that won't need special unmapping.
14984 */
14985 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
14986 * should speed up things for the pure interpreter as well when TLBs
14987 * are enabled. */
14988#ifdef RT_ARCH_AMD64
14989 if (pVarUnmapInfo->idxReg == UINT8_MAX)
14990 {
14991 /* test byte [rbp - xxx], 0ffh */
14992 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
14993 pbCodeBuf[off++] = 0xf6;
14994 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
14995 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
14996 pbCodeBuf[off++] = 0xff;
14997 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
14998 }
14999 else
15000#endif
15001 {
15002 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
15003 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
15004 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
15005 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
15006 }
15007 uint32_t const offJmpFixup = off;
15008 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
15009
15010 /*
15011 * Call the unmap helper function.
15012 */
15013#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
15014 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
15015#else
15016 RT_NOREF(idxInstr);
15017#endif
15018
15019 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
15020 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
15021 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
15022
15023 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
15024 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
15025
15026 /* Done setting up parameters, make the call. */
15027 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
15028
15029 /* The bUnmapInfo variable is implictly free by these MCs. */
15030 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
15031
15032 /*
15033 * Done, just fixup the jump for the non-call case.
15034 */
15035 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
15036
15037 return off;
15038}
15039
15040
15041
15042/*********************************************************************************************************************************
15043* State and Exceptions *
15044*********************************************************************************************************************************/
15045
15046#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
15047#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
15048
15049#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
15050#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
15051#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
15052
15053#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
15054#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
15055#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
15056
15057
15058DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
15059{
15060 /** @todo this needs a lot more work later. */
15061 RT_NOREF(pReNative, fForChange);
15062 return off;
15063}
15064
15065
15066
15067/*********************************************************************************************************************************
15068* Emitters for FPU related operations. *
15069*********************************************************************************************************************************/
15070
15071#define IEM_MC_FETCH_FCW(a_u16Fcw) \
15072 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
15073
15074/** Emits code for IEM_MC_FETCH_FCW. */
15075DECL_INLINE_THROW(uint32_t)
15076iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
15077{
15078 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
15079 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
15080
15081 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
15082
15083 /* Allocate a temporary FCW register. */
15084 /** @todo eliminate extra register */
15085 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
15086 kIemNativeGstRegUse_ReadOnly);
15087
15088 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
15089
15090 /* Free but don't flush the FCW register. */
15091 iemNativeRegFreeTmp(pReNative, idxFcwReg);
15092 iemNativeVarRegisterRelease(pReNative, idxDstVar);
15093
15094 return off;
15095}
15096
15097
15098#define IEM_MC_FETCH_FSW(a_u16Fsw) \
15099 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
15100
15101/** Emits code for IEM_MC_FETCH_FSW. */
15102DECL_INLINE_THROW(uint32_t)
15103iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
15104{
15105 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
15106 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
15107
15108 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, false /*fInitialized*/);
15109 /* Allocate a temporary FSW register. */
15110 /** @todo eliminate extra register */
15111 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
15112 kIemNativeGstRegUse_ReadOnly);
15113
15114 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
15115
15116 /* Free but don't flush the FSW register. */
15117 iemNativeRegFreeTmp(pReNative, idxFswReg);
15118 iemNativeVarRegisterRelease(pReNative, idxDstVar);
15119
15120 return off;
15121}
15122
15123
15124
15125#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
15126
15127
15128/*********************************************************************************************************************************
15129* Emitters for SSE/AVX specific operations. *
15130*********************************************************************************************************************************/
15131
15132#define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc) \
15133 off = iemNativeEmitSimdCopyXregU128(pReNative, off, a_iXRegDst, a_iXRegSrc)
15134
15135/** Emits code for IEM_MC_COPY_XREG_U128. */
15136DECL_INLINE_THROW(uint32_t)
15137iemNativeEmitSimdCopyXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXRegDst, uint8_t iXRegSrc)
15138{
15139 /* Allocate destination and source register. */
15140 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegDst),
15141 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForFullWrite);
15142 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc),
15143 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
15144
15145 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
15146 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, iXRegDst);
15147 /* We don't need to write everything back here as the destination is marked as dirty and will be flushed automatically. */
15148
15149 /* Free but don't flush the source and destination register. */
15150 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
15151 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
15152
15153 return off;
15154}
15155
15156
15157#define IEM_MC_FETCH_XREG_U64(a_u64Value, a_iXReg, a_iQWord) \
15158 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_u64Value, a_iXReg, a_iQWord)
15159
15160/** Emits code for IEM_MC_FETCH_XREG_U64. */
15161DECL_INLINE_THROW(uint32_t)
15162iemNativeEmitSimdFetchXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iQWord)
15163{
15164 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
15165 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
15166
15167 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
15168 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
15169
15170 iemNativeVarSetKindToStack(pReNative, idxDstVar);
15171 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
15172
15173 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
15174
15175 /* Free but don't flush the source register. */
15176 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
15177 iemNativeVarRegisterRelease(pReNative, idxDstVar);
15178
15179 return off;
15180}
15181
15182
15183#define IEM_MC_FETCH_XREG_U32(a_u64Value, a_iXReg, a_iDWord) \
15184 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_u64Value, a_iXReg, a_iDWord)
15185
15186/** Emits code for IEM_MC_FETCH_XREG_U32. */
15187DECL_INLINE_THROW(uint32_t)
15188iemNativeEmitSimdFetchXregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iDWord)
15189{
15190 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
15191 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
15192
15193 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
15194 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
15195
15196 iemNativeVarSetKindToStack(pReNative, idxDstVar);
15197 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
15198
15199 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
15200
15201 /* Free but don't flush the source register. */
15202 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
15203 iemNativeVarRegisterRelease(pReNative, idxDstVar);
15204
15205 return off;
15206}
15207
15208
15209#define IEM_MC_CLEAR_YREG_128_UP(a_iYReg) \
15210 off = iemNativeEmitSimdClearYregHighU128(pReNative, off, a_iYReg)
15211
15212/** Emits code for IEM_MC_CLEAR_YREG_128_UP. */
15213DECL_INLINE_THROW(uint32_t)
15214iemNativeEmitSimdClearYregHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
15215{
15216 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
15217 kIemNativeGstSimdRegLdStSz_High128, kIemNativeGstRegUse_ForFullWrite);
15218
15219 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
15220 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, iYReg);
15221
15222 /* Free but don't flush the register. */
15223 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
15224
15225 return off;
15226}
15227
15228#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
15229
15230
15231/*********************************************************************************************************************************
15232* The native code generator functions for each MC block. *
15233*********************************************************************************************************************************/
15234
15235/*
15236 * Include instruction emitters.
15237 */
15238#include "target-x86/IEMAllN8veEmit-x86.h"
15239
15240/*
15241 * Include g_apfnIemNativeRecompileFunctions and associated functions.
15242 *
15243 * This should probably live in it's own file later, but lets see what the
15244 * compile times turn out to be first.
15245 */
15246#include "IEMNativeFunctions.cpp.h"
15247
15248
15249
15250/*********************************************************************************************************************************
15251* Recompiler Core. *
15252*********************************************************************************************************************************/
15253
15254
15255/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
15256static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
15257{
15258 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
15259 pDis->cbCachedInstr += cbMaxRead;
15260 RT_NOREF(cbMinRead);
15261 return VERR_NO_DATA;
15262}
15263
15264
15265DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
15266{
15267 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
15268 {
15269#define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */
15270 ENTRY(fLocalForcedActions),
15271 ENTRY(iem.s.rcPassUp),
15272 ENTRY(iem.s.fExec),
15273 ENTRY(iem.s.pbInstrBuf),
15274 ENTRY(iem.s.uInstrBufPc),
15275 ENTRY(iem.s.GCPhysInstrBuf),
15276 ENTRY(iem.s.cbInstrBufTotal),
15277 ENTRY(iem.s.idxTbCurInstr),
15278#ifdef VBOX_WITH_STATISTICS
15279 ENTRY(iem.s.StatNativeTlbHitsForFetch),
15280 ENTRY(iem.s.StatNativeTlbHitsForStore),
15281 ENTRY(iem.s.StatNativeTlbHitsForStack),
15282 ENTRY(iem.s.StatNativeTlbHitsForMapped),
15283 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
15284 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
15285 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
15286 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
15287#endif
15288 ENTRY(iem.s.DataTlb.aEntries),
15289 ENTRY(iem.s.DataTlb.uTlbRevision),
15290 ENTRY(iem.s.DataTlb.uTlbPhysRev),
15291 ENTRY(iem.s.DataTlb.cTlbHits),
15292 ENTRY(iem.s.CodeTlb.aEntries),
15293 ENTRY(iem.s.CodeTlb.uTlbRevision),
15294 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
15295 ENTRY(iem.s.CodeTlb.cTlbHits),
15296 ENTRY(pVMR3),
15297 ENTRY(cpum.GstCtx.rax),
15298 ENTRY(cpum.GstCtx.ah),
15299 ENTRY(cpum.GstCtx.rcx),
15300 ENTRY(cpum.GstCtx.ch),
15301 ENTRY(cpum.GstCtx.rdx),
15302 ENTRY(cpum.GstCtx.dh),
15303 ENTRY(cpum.GstCtx.rbx),
15304 ENTRY(cpum.GstCtx.bh),
15305 ENTRY(cpum.GstCtx.rsp),
15306 ENTRY(cpum.GstCtx.rbp),
15307 ENTRY(cpum.GstCtx.rsi),
15308 ENTRY(cpum.GstCtx.rdi),
15309 ENTRY(cpum.GstCtx.r8),
15310 ENTRY(cpum.GstCtx.r9),
15311 ENTRY(cpum.GstCtx.r10),
15312 ENTRY(cpum.GstCtx.r11),
15313 ENTRY(cpum.GstCtx.r12),
15314 ENTRY(cpum.GstCtx.r13),
15315 ENTRY(cpum.GstCtx.r14),
15316 ENTRY(cpum.GstCtx.r15),
15317 ENTRY(cpum.GstCtx.es.Sel),
15318 ENTRY(cpum.GstCtx.es.u64Base),
15319 ENTRY(cpum.GstCtx.es.u32Limit),
15320 ENTRY(cpum.GstCtx.es.Attr),
15321 ENTRY(cpum.GstCtx.cs.Sel),
15322 ENTRY(cpum.GstCtx.cs.u64Base),
15323 ENTRY(cpum.GstCtx.cs.u32Limit),
15324 ENTRY(cpum.GstCtx.cs.Attr),
15325 ENTRY(cpum.GstCtx.ss.Sel),
15326 ENTRY(cpum.GstCtx.ss.u64Base),
15327 ENTRY(cpum.GstCtx.ss.u32Limit),
15328 ENTRY(cpum.GstCtx.ss.Attr),
15329 ENTRY(cpum.GstCtx.ds.Sel),
15330 ENTRY(cpum.GstCtx.ds.u64Base),
15331 ENTRY(cpum.GstCtx.ds.u32Limit),
15332 ENTRY(cpum.GstCtx.ds.Attr),
15333 ENTRY(cpum.GstCtx.fs.Sel),
15334 ENTRY(cpum.GstCtx.fs.u64Base),
15335 ENTRY(cpum.GstCtx.fs.u32Limit),
15336 ENTRY(cpum.GstCtx.fs.Attr),
15337 ENTRY(cpum.GstCtx.gs.Sel),
15338 ENTRY(cpum.GstCtx.gs.u64Base),
15339 ENTRY(cpum.GstCtx.gs.u32Limit),
15340 ENTRY(cpum.GstCtx.gs.Attr),
15341 ENTRY(cpum.GstCtx.rip),
15342 ENTRY(cpum.GstCtx.eflags),
15343 ENTRY(cpum.GstCtx.uRipInhibitInt),
15344#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
15345 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
15346 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
15347 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
15348 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
15349 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
15350 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
15351 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
15352 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
15353 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
15354 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
15355 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
15356 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
15357 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
15358 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
15359 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
15360 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
15361 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
15362 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
15363 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
15364 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
15365 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
15366 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
15367 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
15368 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
15369 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
15370 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
15371 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
15372 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
15373 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
15374 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
15375 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
15376 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
15377#endif
15378#undef ENTRY
15379 };
15380#ifdef VBOX_STRICT
15381 static bool s_fOrderChecked = false;
15382 if (!s_fOrderChecked)
15383 {
15384 s_fOrderChecked = true;
15385 uint32_t offPrev = s_aMembers[0].off;
15386 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
15387 {
15388 Assert(s_aMembers[i].off > offPrev);
15389 offPrev = s_aMembers[i].off;
15390 }
15391 }
15392#endif
15393
15394 /*
15395 * Binary lookup.
15396 */
15397 unsigned iStart = 0;
15398 unsigned iEnd = RT_ELEMENTS(s_aMembers);
15399 for (;;)
15400 {
15401 unsigned const iCur = iStart + (iEnd - iStart) / 2;
15402 uint32_t const offCur = s_aMembers[iCur].off;
15403 if (off < offCur)
15404 {
15405 if (iCur != iStart)
15406 iEnd = iCur;
15407 else
15408 break;
15409 }
15410 else if (off > offCur)
15411 {
15412 if (iCur + 1 < iEnd)
15413 iStart = iCur + 1;
15414 else
15415 break;
15416 }
15417 else
15418 return s_aMembers[iCur].pszName;
15419 }
15420#ifdef VBOX_WITH_STATISTICS
15421 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
15422 return "iem.s.acThreadedFuncStats[iFn]";
15423#endif
15424 return NULL;
15425}
15426
15427
15428/**
15429 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
15430 * @returns pszBuf.
15431 * @param fFlags The flags.
15432 * @param pszBuf The output buffer.
15433 * @param cbBuf The output buffer size. At least 32 bytes.
15434 */
15435DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
15436{
15437 Assert(cbBuf >= 32);
15438 static RTSTRTUPLE const s_aModes[] =
15439 {
15440 /* [00] = */ { RT_STR_TUPLE("16BIT") },
15441 /* [01] = */ { RT_STR_TUPLE("32BIT") },
15442 /* [02] = */ { RT_STR_TUPLE("!2!") },
15443 /* [03] = */ { RT_STR_TUPLE("!3!") },
15444 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
15445 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
15446 /* [06] = */ { RT_STR_TUPLE("!6!") },
15447 /* [07] = */ { RT_STR_TUPLE("!7!") },
15448 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
15449 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
15450 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
15451 /* [0b] = */ { RT_STR_TUPLE("!b!") },
15452 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
15453 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
15454 /* [0e] = */ { RT_STR_TUPLE("!e!") },
15455 /* [0f] = */ { RT_STR_TUPLE("!f!") },
15456 /* [10] = */ { RT_STR_TUPLE("!10!") },
15457 /* [11] = */ { RT_STR_TUPLE("!11!") },
15458 /* [12] = */ { RT_STR_TUPLE("!12!") },
15459 /* [13] = */ { RT_STR_TUPLE("!13!") },
15460 /* [14] = */ { RT_STR_TUPLE("!14!") },
15461 /* [15] = */ { RT_STR_TUPLE("!15!") },
15462 /* [16] = */ { RT_STR_TUPLE("!16!") },
15463 /* [17] = */ { RT_STR_TUPLE("!17!") },
15464 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
15465 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
15466 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
15467 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
15468 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
15469 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
15470 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
15471 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
15472 };
15473 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
15474 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
15475 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
15476
15477 pszBuf[off++] = ' ';
15478 pszBuf[off++] = 'C';
15479 pszBuf[off++] = 'P';
15480 pszBuf[off++] = 'L';
15481 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
15482 Assert(off < 32);
15483
15484 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
15485
15486 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
15487 {
15488 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
15489 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
15490 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
15491 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
15492 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
15493 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
15494 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
15495 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
15496 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
15497 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
15498 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
15499 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
15500 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
15501 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
15502 };
15503 if (fFlags)
15504 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
15505 if (s_aFlags[i].fFlag & fFlags)
15506 {
15507 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
15508 pszBuf[off++] = ' ';
15509 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
15510 off += s_aFlags[i].cchName;
15511 fFlags &= ~s_aFlags[i].fFlag;
15512 if (!fFlags)
15513 break;
15514 }
15515 pszBuf[off] = '\0';
15516
15517 return pszBuf;
15518}
15519
15520
15521DECLHIDDEN(void) iemNativeDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
15522{
15523 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
15524#if defined(RT_ARCH_AMD64)
15525 static const char * const a_apszMarkers[] =
15526 {
15527 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
15528 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
15529 };
15530#endif
15531
15532 char szDisBuf[512];
15533 DISSTATE Dis;
15534 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
15535 uint32_t const cNative = pTb->Native.cInstructions;
15536 uint32_t offNative = 0;
15537#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
15538 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
15539#endif
15540 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
15541 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
15542 : DISCPUMODE_64BIT;
15543#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
15544 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
15545#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
15546 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
15547#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
15548# error "Port me"
15549#else
15550 csh hDisasm = ~(size_t)0;
15551# if defined(RT_ARCH_AMD64)
15552 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
15553# elif defined(RT_ARCH_ARM64)
15554 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
15555# else
15556# error "Port me"
15557# endif
15558 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
15559
15560 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
15561 //Assert(rcCs == CS_ERR_OK);
15562#endif
15563
15564 /*
15565 * Print TB info.
15566 */
15567 pHlp->pfnPrintf(pHlp,
15568 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u\n"
15569 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
15570 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
15571 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
15572#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
15573 if (pDbgInfo && pDbgInfo->cEntries > 1)
15574 {
15575 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
15576
15577 /*
15578 * This disassembly is driven by the debug info which follows the native
15579 * code and indicates when it starts with the next guest instructions,
15580 * where labels are and such things.
15581 */
15582 uint32_t idxThreadedCall = 0;
15583 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
15584 uint8_t idxRange = UINT8_MAX;
15585 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
15586 uint32_t offRange = 0;
15587 uint32_t offOpcodes = 0;
15588 uint32_t const cbOpcodes = pTb->cbOpcodes;
15589 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
15590 uint32_t const cDbgEntries = pDbgInfo->cEntries;
15591 uint32_t iDbgEntry = 1;
15592 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
15593
15594 while (offNative < cNative)
15595 {
15596 /* If we're at or have passed the point where the next chunk of debug
15597 info starts, process it. */
15598 if (offDbgNativeNext <= offNative)
15599 {
15600 offDbgNativeNext = UINT32_MAX;
15601 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
15602 {
15603 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
15604 {
15605 case kIemTbDbgEntryType_GuestInstruction:
15606 {
15607 /* Did the exec flag change? */
15608 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
15609 {
15610 pHlp->pfnPrintf(pHlp,
15611 " fExec change %#08x -> %#08x %s\n",
15612 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
15613 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
15614 szDisBuf, sizeof(szDisBuf)));
15615 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
15616 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
15617 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
15618 : DISCPUMODE_64BIT;
15619 }
15620
15621 /* New opcode range? We need to fend up a spurious debug info entry here for cases
15622 where the compilation was aborted before the opcode was recorded and the actual
15623 instruction was translated to a threaded call. This may happen when we run out
15624 of ranges, or when some complicated interrupts/FFs are found to be pending or
15625 similar. So, we just deal with it here rather than in the compiler code as it
15626 is a lot simpler to do here. */
15627 if ( idxRange == UINT8_MAX
15628 || idxRange >= cRanges
15629 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
15630 {
15631 idxRange += 1;
15632 if (idxRange < cRanges)
15633 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
15634 else
15635 continue;
15636 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
15637 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
15638 + (pTb->aRanges[idxRange].idxPhysPage == 0
15639 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
15640 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
15641 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
15642 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
15643 pTb->aRanges[idxRange].idxPhysPage);
15644 GCPhysPc += offRange;
15645 }
15646
15647 /* Disassemble the instruction. */
15648 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
15649 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
15650 uint32_t cbInstr = 1;
15651 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
15652 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
15653 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
15654 if (RT_SUCCESS(rc))
15655 {
15656 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
15657 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
15658 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
15659 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
15660
15661 static unsigned const s_offMarker = 55;
15662 static char const s_szMarker[] = " ; <--- guest";
15663 if (cch < s_offMarker)
15664 {
15665 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
15666 cch = s_offMarker;
15667 }
15668 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
15669 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
15670
15671 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
15672 }
15673 else
15674 {
15675 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
15676 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
15677 cbInstr = 1;
15678 }
15679 GCPhysPc += cbInstr;
15680 offOpcodes += cbInstr;
15681 offRange += cbInstr;
15682 continue;
15683 }
15684
15685 case kIemTbDbgEntryType_ThreadedCall:
15686 pHlp->pfnPrintf(pHlp,
15687 " Call #%u to %s (%u args) - %s\n",
15688 idxThreadedCall,
15689 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
15690 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
15691 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
15692 idxThreadedCall++;
15693 continue;
15694
15695 case kIemTbDbgEntryType_GuestRegShadowing:
15696 {
15697 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
15698 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
15699 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
15700 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
15701 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
15702 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
15703 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s\n", pszGstReg,
15704 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
15705 else
15706 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
15707 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
15708 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
15709 continue;
15710 }
15711
15712#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
15713 case kIemTbDbgEntryType_GuestSimdRegShadowing:
15714 {
15715 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
15716 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
15717 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
15718 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
15719 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
15720 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
15721 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
15722 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
15723 else
15724 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
15725 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
15726 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
15727 continue;
15728 }
15729#endif
15730
15731 case kIemTbDbgEntryType_Label:
15732 {
15733 const char *pszName = "what_the_fudge";
15734 const char *pszComment = "";
15735 bool fNumbered = pDbgInfo->aEntries[iDbgEntry].Label.uData != 0;
15736 switch ((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel)
15737 {
15738 case kIemNativeLabelType_Return:
15739 pszName = "Return";
15740 break;
15741 case kIemNativeLabelType_ReturnBreak:
15742 pszName = "ReturnBreak";
15743 break;
15744 case kIemNativeLabelType_ReturnWithFlags:
15745 pszName = "ReturnWithFlags";
15746 break;
15747 case kIemNativeLabelType_NonZeroRetOrPassUp:
15748 pszName = "NonZeroRetOrPassUp";
15749 break;
15750 case kIemNativeLabelType_RaiseGp0:
15751 pszName = "RaiseGp0";
15752 break;
15753 case kIemNativeLabelType_RaiseNm:
15754 pszName = "RaiseNm";
15755 break;
15756 case kIemNativeLabelType_RaiseUd:
15757 pszName = "RaiseUd";
15758 break;
15759 case kIemNativeLabelType_RaiseMf:
15760 pszName = "RaiseMf";
15761 break;
15762 case kIemNativeLabelType_RaiseXf:
15763 pszName = "RaiseXf";
15764 break;
15765 case kIemNativeLabelType_ObsoleteTb:
15766 pszName = "ObsoleteTb";
15767 break;
15768 case kIemNativeLabelType_NeedCsLimChecking:
15769 pszName = "NeedCsLimChecking";
15770 break;
15771 case kIemNativeLabelType_CheckBranchMiss:
15772 pszName = "CheckBranchMiss";
15773 break;
15774 case kIemNativeLabelType_If:
15775 pszName = "If";
15776 fNumbered = true;
15777 break;
15778 case kIemNativeLabelType_Else:
15779 pszName = "Else";
15780 fNumbered = true;
15781 pszComment = " ; regs state restored pre-if-block";
15782 break;
15783 case kIemNativeLabelType_Endif:
15784 pszName = "Endif";
15785 fNumbered = true;
15786 break;
15787 case kIemNativeLabelType_CheckIrq:
15788 pszName = "CheckIrq_CheckVM";
15789 fNumbered = true;
15790 break;
15791 case kIemNativeLabelType_TlbLookup:
15792 pszName = "TlbLookup";
15793 fNumbered = true;
15794 break;
15795 case kIemNativeLabelType_TlbMiss:
15796 pszName = "TlbMiss";
15797 fNumbered = true;
15798 break;
15799 case kIemNativeLabelType_TlbDone:
15800 pszName = "TlbDone";
15801 fNumbered = true;
15802 break;
15803 case kIemNativeLabelType_Invalid:
15804 case kIemNativeLabelType_End:
15805 break;
15806 }
15807 if (fNumbered)
15808 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
15809 else
15810 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
15811 continue;
15812 }
15813
15814 case kIemTbDbgEntryType_NativeOffset:
15815 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
15816 Assert(offDbgNativeNext > offNative);
15817 break;
15818
15819#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
15820 case kIemTbDbgEntryType_DelayedPcUpdate:
15821 pHlp->pfnPrintf(pHlp,
15822 " Updating guest PC value by %u (cInstrSkipped=%u)\n",
15823 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
15824 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
15825 continue;
15826#endif
15827
15828 default:
15829 AssertFailed();
15830 }
15831 iDbgEntry++;
15832 break;
15833 }
15834 }
15835
15836 /*
15837 * Disassemble the next native instruction.
15838 */
15839 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
15840# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
15841 uint32_t cbInstr = sizeof(paNative[0]);
15842 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
15843 if (RT_SUCCESS(rc))
15844 {
15845# if defined(RT_ARCH_AMD64)
15846 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
15847 {
15848 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
15849 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
15850 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
15851 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
15852 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
15853 uInfo & 0x8000 ? "recompiled" : "todo");
15854 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
15855 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
15856 else
15857 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
15858 }
15859 else
15860# endif
15861 {
15862 const char *pszAnnotation = NULL;
15863# ifdef RT_ARCH_AMD64
15864 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
15865 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
15866 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
15867 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
15868 PCDISOPPARAM pMemOp;
15869 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param1.fUse))
15870 pMemOp = &Dis.Param1;
15871 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param2.fUse))
15872 pMemOp = &Dis.Param2;
15873 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.Param3.fUse))
15874 pMemOp = &Dis.Param3;
15875 else
15876 pMemOp = NULL;
15877 if ( pMemOp
15878 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
15879 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
15880 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
15881 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
15882
15883#elif defined(RT_ARCH_ARM64)
15884 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
15885 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
15886 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
15887# else
15888# error "Port me"
15889# endif
15890 if (pszAnnotation)
15891 {
15892 static unsigned const s_offAnnotation = 55;
15893 size_t const cchAnnotation = strlen(pszAnnotation);
15894 size_t cchDis = strlen(szDisBuf);
15895 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
15896 {
15897 if (cchDis < s_offAnnotation)
15898 {
15899 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
15900 cchDis = s_offAnnotation;
15901 }
15902 szDisBuf[cchDis++] = ' ';
15903 szDisBuf[cchDis++] = ';';
15904 szDisBuf[cchDis++] = ' ';
15905 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
15906 }
15907 }
15908 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
15909 }
15910 }
15911 else
15912 {
15913# if defined(RT_ARCH_AMD64)
15914 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
15915 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
15916# elif defined(RT_ARCH_ARM64)
15917 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
15918# else
15919# error "Port me"
15920# endif
15921 cbInstr = sizeof(paNative[0]);
15922 }
15923 offNative += cbInstr / sizeof(paNative[0]);
15924
15925# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
15926 cs_insn *pInstr;
15927 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
15928 (uintptr_t)pNativeCur, 1, &pInstr);
15929 if (cInstrs > 0)
15930 {
15931 Assert(cInstrs == 1);
15932 const char *pszAnnotation = NULL;
15933# if defined(RT_ARCH_ARM64)
15934 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
15935 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
15936 {
15937 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
15938 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
15939 char *psz = strchr(pInstr->op_str, '[');
15940 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
15941 {
15942 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
15943 int32_t off = -1;
15944 psz += 4;
15945 if (*psz == ']')
15946 off = 0;
15947 else if (*psz == ',')
15948 {
15949 psz = RTStrStripL(psz + 1);
15950 if (*psz == '#')
15951 off = RTStrToInt32(&psz[1]);
15952 /** @todo deal with index registers and LSL as well... */
15953 }
15954 if (off >= 0)
15955 pszAnnotation = iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
15956 }
15957 }
15958# endif
15959
15960 size_t const cchOp = strlen(pInstr->op_str);
15961# if defined(RT_ARCH_AMD64)
15962 if (pszAnnotation)
15963 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
15964 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
15965 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
15966 else
15967 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
15968 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
15969
15970# else
15971 if (pszAnnotation)
15972 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
15973 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
15974 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
15975 else
15976 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
15977 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
15978# endif
15979 offNative += pInstr->size / sizeof(*pNativeCur);
15980 cs_free(pInstr, cInstrs);
15981 }
15982 else
15983 {
15984# if defined(RT_ARCH_AMD64)
15985 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
15986 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
15987# else
15988 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
15989# endif
15990 offNative++;
15991 }
15992# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
15993 }
15994 }
15995 else
15996#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
15997 {
15998 /*
15999 * No debug info, just disassemble the x86 code and then the native code.
16000 *
16001 * First the guest code:
16002 */
16003 for (unsigned i = 0; i < pTb->cRanges; i++)
16004 {
16005 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
16006 + (pTb->aRanges[i].idxPhysPage == 0
16007 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
16008 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
16009 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
16010 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
16011 unsigned off = pTb->aRanges[i].offOpcodes;
16012 /** @todo this ain't working when crossing pages! */
16013 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
16014 while (off < cbOpcodes)
16015 {
16016 uint32_t cbInstr = 1;
16017 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
16018 &pTb->pabOpcodes[off], cbOpcodes - off,
16019 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
16020 if (RT_SUCCESS(rc))
16021 {
16022 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
16023 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
16024 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
16025 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
16026 pHlp->pfnPrintf(pHlp, " %RGp: %s\n", GCPhysPc, szDisBuf);
16027 GCPhysPc += cbInstr;
16028 off += cbInstr;
16029 }
16030 else
16031 {
16032 pHlp->pfnPrintf(pHlp, " %RGp: %.*Rhxs - disassembly failure %Rrc\n",
16033 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
16034 break;
16035 }
16036 }
16037 }
16038
16039 /*
16040 * Then the native code:
16041 */
16042 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
16043 while (offNative < cNative)
16044 {
16045 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
16046# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
16047 uint32_t cbInstr = sizeof(paNative[0]);
16048 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
16049 if (RT_SUCCESS(rc))
16050 {
16051# if defined(RT_ARCH_AMD64)
16052 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
16053 {
16054 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
16055 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
16056 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
16057 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
16058 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
16059 uInfo & 0x8000 ? "recompiled" : "todo");
16060 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
16061 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
16062 else
16063 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
16064 }
16065 else
16066# endif
16067 {
16068# ifdef RT_ARCH_AMD64
16069 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
16070 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
16071 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
16072 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
16073# elif defined(RT_ARCH_ARM64)
16074 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
16075 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
16076 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
16077# else
16078# error "Port me"
16079# endif
16080 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
16081 }
16082 }
16083 else
16084 {
16085# if defined(RT_ARCH_AMD64)
16086 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
16087 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
16088# else
16089 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
16090# endif
16091 cbInstr = sizeof(paNative[0]);
16092 }
16093 offNative += cbInstr / sizeof(paNative[0]);
16094
16095# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
16096 cs_insn *pInstr;
16097 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
16098 (uintptr_t)pNativeCur, 1, &pInstr);
16099 if (cInstrs > 0)
16100 {
16101 Assert(cInstrs == 1);
16102# if defined(RT_ARCH_AMD64)
16103 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
16104 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
16105# else
16106 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
16107 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
16108# endif
16109 offNative += pInstr->size / sizeof(*pNativeCur);
16110 cs_free(pInstr, cInstrs);
16111 }
16112 else
16113 {
16114# if defined(RT_ARCH_AMD64)
16115 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
16116 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
16117# else
16118 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
16119# endif
16120 offNative++;
16121 }
16122# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
16123 }
16124 }
16125
16126#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
16127 /* Cleanup. */
16128 cs_close(&hDisasm);
16129#endif
16130}
16131
16132
16133/**
16134 * Recompiles the given threaded TB into a native one.
16135 *
16136 * In case of failure the translation block will be returned as-is.
16137 *
16138 * @returns pTb.
16139 * @param pVCpu The cross context virtual CPU structure of the calling
16140 * thread.
16141 * @param pTb The threaded translation to recompile to native.
16142 */
16143DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
16144{
16145 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
16146
16147 /*
16148 * The first time thru, we allocate the recompiler state, the other times
16149 * we just need to reset it before using it again.
16150 */
16151 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
16152 if (RT_LIKELY(pReNative))
16153 iemNativeReInit(pReNative, pTb);
16154 else
16155 {
16156 pReNative = iemNativeInit(pVCpu, pTb);
16157 AssertReturn(pReNative, pTb);
16158 }
16159
16160#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
16161 /*
16162 * First do liveness analysis. This is done backwards.
16163 */
16164 {
16165 uint32_t idxCall = pTb->Thrd.cCalls;
16166 if (idxCall <= pReNative->cLivenessEntriesAlloc)
16167 { /* likely */ }
16168 else
16169 {
16170 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
16171 while (idxCall > cAlloc)
16172 cAlloc *= 2;
16173 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
16174 AssertReturn(pvNew, pTb);
16175 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
16176 pReNative->cLivenessEntriesAlloc = cAlloc;
16177 }
16178 AssertReturn(idxCall > 0, pTb);
16179 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
16180
16181 /* The initial (final) entry. */
16182 idxCall--;
16183 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
16184
16185 /* Loop backwards thru the calls and fill in the other entries. */
16186 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
16187 while (idxCall > 0)
16188 {
16189 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
16190 if (pfnLiveness)
16191 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
16192 else
16193 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
16194 pCallEntry--;
16195 idxCall--;
16196 }
16197
16198# ifdef VBOX_WITH_STATISTICS
16199 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
16200 to 'clobbered' rather that 'input'. */
16201 /** @todo */
16202# endif
16203 }
16204#endif
16205
16206 /*
16207 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
16208 * for aborting if an error happens.
16209 */
16210 uint32_t cCallsLeft = pTb->Thrd.cCalls;
16211#ifdef LOG_ENABLED
16212 uint32_t const cCallsOrg = cCallsLeft;
16213#endif
16214 uint32_t off = 0;
16215 int rc = VINF_SUCCESS;
16216 IEMNATIVE_TRY_SETJMP(pReNative, rc)
16217 {
16218 /*
16219 * Emit prolog code (fixed).
16220 */
16221 off = iemNativeEmitProlog(pReNative, off);
16222
16223 /*
16224 * Convert the calls to native code.
16225 */
16226#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
16227 int32_t iGstInstr = -1;
16228#endif
16229#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
16230 uint32_t cThreadedCalls = 0;
16231 uint32_t cRecompiledCalls = 0;
16232#endif
16233#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
16234 uint32_t idxCurCall = 0;
16235#endif
16236 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
16237 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
16238 while (cCallsLeft-- > 0)
16239 {
16240 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
16241#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
16242 pReNative->idxCurCall = idxCurCall;
16243#endif
16244
16245 /*
16246 * Debug info, assembly markup and statistics.
16247 */
16248#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
16249 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
16250 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
16251#endif
16252#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
16253 iemNativeDbgInfoAddNativeOffset(pReNative, off);
16254 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
16255 {
16256 if (iGstInstr < (int32_t)pTb->cInstructions)
16257 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
16258 else
16259 Assert(iGstInstr == pTb->cInstructions);
16260 iGstInstr = pCallEntry->idxInstr;
16261 }
16262 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
16263#endif
16264#if defined(VBOX_STRICT)
16265 off = iemNativeEmitMarker(pReNative, off,
16266 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
16267#endif
16268#if defined(VBOX_STRICT)
16269 iemNativeRegAssertSanity(pReNative);
16270#endif
16271#ifdef VBOX_WITH_STATISTICS
16272 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
16273#endif
16274
16275 /*
16276 * Actual work.
16277 */
16278 Log2(("%u[%u]: %s%s\n", idxCurCall, pCallEntry->idxInstr, g_apszIemThreadedFunctions[pCallEntry->enmFunction],
16279 pfnRecom ? "(recompiled)" : "(todo)"));
16280 if (pfnRecom) /** @todo stats on this. */
16281 {
16282 off = pfnRecom(pReNative, off, pCallEntry);
16283 STAM_REL_STATS({cRecompiledCalls++;});
16284 }
16285 else
16286 {
16287 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
16288 STAM_REL_STATS({cThreadedCalls++;});
16289 }
16290 Assert(off <= pReNative->cInstrBufAlloc);
16291 Assert(pReNative->cCondDepth == 0);
16292
16293#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
16294 if (LogIs2Enabled())
16295 {
16296 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
16297# ifndef IEMLIVENESS_EXTENDED_LAYOUT
16298 static const char s_achState[] = "CUXI";
16299# else
16300 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
16301# endif
16302
16303 char szGpr[17];
16304 for (unsigned i = 0; i < 16; i++)
16305 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
16306 szGpr[16] = '\0';
16307
16308 char szSegBase[X86_SREG_COUNT + 1];
16309 char szSegLimit[X86_SREG_COUNT + 1];
16310 char szSegAttrib[X86_SREG_COUNT + 1];
16311 char szSegSel[X86_SREG_COUNT + 1];
16312 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
16313 {
16314 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
16315 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
16316 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
16317 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
16318 }
16319 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
16320 = szSegSel[X86_SREG_COUNT] = '\0';
16321
16322 char szEFlags[8];
16323 for (unsigned i = 0; i < 7; i++)
16324 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
16325 szEFlags[7] = '\0';
16326
16327 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
16328 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
16329 }
16330#endif
16331
16332 /*
16333 * Advance.
16334 */
16335 pCallEntry++;
16336#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_STRICT) || defined(LOG_ENABLED)
16337 idxCurCall++;
16338#endif
16339 }
16340
16341 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
16342 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
16343 if (!cThreadedCalls)
16344 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
16345
16346 /*
16347 * Emit the epilog code.
16348 */
16349 uint32_t idxReturnLabel;
16350 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
16351
16352 /*
16353 * Generate special jump labels.
16354 */
16355 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
16356 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
16357 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
16358 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
16359 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseGp0))
16360 off = iemNativeEmitRaiseGp0(pReNative, off, idxReturnLabel);
16361 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseNm))
16362 off = iemNativeEmitRaiseNm(pReNative, off, idxReturnLabel);
16363 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseUd))
16364 off = iemNativeEmitRaiseUd(pReNative, off, idxReturnLabel);
16365 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseMf))
16366 off = iemNativeEmitRaiseMf(pReNative, off, idxReturnLabel);
16367 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_RaiseXf))
16368 off = iemNativeEmitRaiseXf(pReNative, off, idxReturnLabel);
16369 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ObsoleteTb))
16370 off = iemNativeEmitObsoleteTb(pReNative, off, idxReturnLabel);
16371 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_NeedCsLimChecking))
16372 off = iemNativeEmitNeedCsLimChecking(pReNative, off, idxReturnLabel);
16373 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_CheckBranchMiss))
16374 off = iemNativeEmitCheckBranchMiss(pReNative, off, idxReturnLabel);
16375 }
16376 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
16377 {
16378 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
16379 return pTb;
16380 }
16381 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
16382 Assert(off <= pReNative->cInstrBufAlloc);
16383
16384 /*
16385 * Make sure all labels has been defined.
16386 */
16387 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
16388#ifdef VBOX_STRICT
16389 uint32_t const cLabels = pReNative->cLabels;
16390 for (uint32_t i = 0; i < cLabels; i++)
16391 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
16392#endif
16393
16394 /*
16395 * Allocate executable memory, copy over the code we've generated.
16396 */
16397 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
16398 if (pTbAllocator->pDelayedFreeHead)
16399 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
16400
16401 PIEMNATIVEINSTR const paFinalInstrBuf = (PIEMNATIVEINSTR)iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR));
16402 AssertReturn(paFinalInstrBuf, pTb);
16403 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
16404
16405 /*
16406 * Apply fixups.
16407 */
16408 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
16409 uint32_t const cFixups = pReNative->cFixups;
16410 for (uint32_t i = 0; i < cFixups; i++)
16411 {
16412 Assert(paFixups[i].off < off);
16413 Assert(paFixups[i].idxLabel < cLabels);
16414 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
16415 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
16416 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
16417 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
16418 switch (paFixups[i].enmType)
16419 {
16420#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
16421 case kIemNativeFixupType_Rel32:
16422 Assert(paFixups[i].off + 4 <= off);
16423 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
16424 continue;
16425
16426#elif defined(RT_ARCH_ARM64)
16427 case kIemNativeFixupType_RelImm26At0:
16428 {
16429 Assert(paFixups[i].off < off);
16430 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
16431 Assert(offDisp >= -262144 && offDisp < 262144);
16432 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
16433 continue;
16434 }
16435
16436 case kIemNativeFixupType_RelImm19At5:
16437 {
16438 Assert(paFixups[i].off < off);
16439 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
16440 Assert(offDisp >= -262144 && offDisp < 262144);
16441 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
16442 continue;
16443 }
16444
16445 case kIemNativeFixupType_RelImm14At5:
16446 {
16447 Assert(paFixups[i].off < off);
16448 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
16449 Assert(offDisp >= -8192 && offDisp < 8192);
16450 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
16451 continue;
16452 }
16453
16454#endif
16455 case kIemNativeFixupType_Invalid:
16456 case kIemNativeFixupType_End:
16457 break;
16458 }
16459 AssertFailed();
16460 }
16461
16462 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBuf, off * sizeof(IEMNATIVEINSTR));
16463 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
16464
16465 /*
16466 * Convert the translation block.
16467 */
16468 RTMemFree(pTb->Thrd.paCalls);
16469 pTb->Native.paInstructions = paFinalInstrBuf;
16470 pTb->Native.cInstructions = off;
16471 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
16472#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
16473 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
16474 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
16475#endif
16476
16477 Assert(pTbAllocator->cThreadedTbs > 0);
16478 pTbAllocator->cThreadedTbs -= 1;
16479 pTbAllocator->cNativeTbs += 1;
16480 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
16481
16482#ifdef LOG_ENABLED
16483 /*
16484 * Disassemble to the log if enabled.
16485 */
16486 if (LogIs3Enabled())
16487 {
16488 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
16489 iemNativeDisassembleTb(pTb, DBGFR3InfoLogHlp());
16490# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
16491 RTLogFlush(NULL);
16492# endif
16493 }
16494#endif
16495 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
16496
16497 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
16498 return pTb;
16499}
16500
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette